Пример #1
0
def make_deep_lstm(size_input, size_mem, n_layers, size_output, size_batch):
    inputs = [cgt.matrix(fixed_shape=(size_batch, size_input))]
    for _ in xrange(2 * n_layers):
        inputs.append(cgt.matrix(fixed_shape=(size_batch, size_mem)))
    outputs = []
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer * 2]
        prev_c = inputs[i_layer * 2 + 1]
        if i_layer == 0:
            x = inputs[0]
            size_x = size_input
        else:
            x = outputs[(i_layer - 1) * 2]
            size_x = size_mem
        input_sums = nn.Affine(size_x, 4 * size_mem)(x) + nn.Affine(
            size_x, 4 * size_mem)(prev_h)
        sigmoid_chunk = cgt.sigmoid(input_sums[:, 0:3 * size_mem])
        in_gate = sigmoid_chunk[:, 0:size_mem]
        forget_gate = sigmoid_chunk[:, size_mem:2 * size_mem]
        out_gate = sigmoid_chunk[:, 2 * size_mem:3 * size_mem]
        in_transform = cgt.tanh(input_sums[:, 3 * size_mem:4 * size_mem])
        next_c = forget_gate * prev_c + in_gate * in_transform
        next_h = out_gate * cgt.tanh(next_c)
        outputs.append(next_c)
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output)(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    return nn.Module(inputs, outputs)
Пример #2
0
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output,
                            size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)

        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Пример #3
0
 def build_fc_return_loss(X, y):
     """
     Build fully connected network and return loss
     """
     np.random.seed(0)
     h1 = nn.rectify(
         nn.Affine(28 * 28, 256, weight_init=nn.IIDGaussian(std=.1))(X))
     h2 = nn.rectify(
         nn.Affine(256, 256, weight_init=nn.IIDGaussian(std=.1))(h1))
     logprobs = nn.logsoftmax(
         nn.Affine(256, 10, weight_init=nn.IIDGaussian(std=.1))(h2))
     neglogliks = -logprobs[cgt.arange(X.shape[0]), y]
     loss = neglogliks.mean()
     return loss
Пример #4
0
Файл: sfnn.py Проект: TZ2016/snn
def hybrid_network(size_in, size_out, num_units, num_stos, dbg_out={}):
    assert len(num_units) == len(num_stos)
    net_in = cgt.matrix("X", fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    dbg_out['NET~in'] = net_in
    curr_layer = 1
    for (curr_num_units, curr_num_sto) in zip(num_units, num_stos):
        assert curr_num_units >= curr_num_sto >= 0
        prev_out = combo_layer(
            prev_out,
            prev_num_units,
            curr_num_units, (curr_num_sto, ),
            s_funcs=s_func_ip,
            o_funcs=(lambda x: cgt.bernoulli(cgt.sigmoid(x)), cgt.nn.rectify),
            name=str(curr_layer),
            dbg_out=dbg_out)
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units,
                        size_out,
                        name="InnerProd(%d->%d)" %
                        (prev_num_units, size_out))(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out
Пример #5
0
def lstm_network_t(size_in, size_out, num_units, num_mems, dbg_out={}):
    def s_func_lstm(_in, _s_in, _s_out, name=''):
        c_prev = cgt.matrix(fixed_shape=(None, _s_out))
        h_prev = cgt.matrix(fixed_shape=(None, _s_out))
        c_cur, h_cur = lstm_block(h_prev, c_prev, _in, _s_in, _s_out, name)
        net_c_prev.append(c_prev)
        net_h_prev.append(h_prev)
        net_c_curr.append(c_cur)
        net_h_curr.append(h_cur)
        return h_cur

    assert len(num_units) == len(num_mems)
    net_c_prev, net_h_prev, net_c_curr, net_h_curr = [], [], [], []
    net_in = cgt.matrix(fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, net_in
    curr_layer = 1
    for curr_num_units, curr_num_mem in zip(num_units, num_mems):
        assert curr_num_units >= curr_num_mem >= 0
        prev_out = combo_layer(prev_out,
                               prev_num_units,
                               curr_num_units, (curr_num_mem, ),
                               s_funcs=(s_func_lstm, s_func_ip),
                               o_funcs=(None, cgt.sigmoid),
                               name=str(curr_layer),
                               dbg_out=dbg_out)
        dbg_out['L%d~out' % curr_layer] = prev_out
        prev_num_units = curr_num_units
        curr_layer += 1
    net_out = nn.Affine(prev_num_units, size_out, name="Out")(prev_out)
    dbg_out['NET~out'] = net_out
    return net_in, net_out, net_c_prev, net_h_prev, net_c_curr, net_h_curr
Пример #6
0
def hybrid_network(size_in, size_out, num_units, num_stos, dbg_out=[]):
    assert len(num_units) == len(num_stos)
    X = cgt.matrix("X", fixed_shape=(None, size_in))
    prev_num_units, prev_out = size_in, X
    dbg_out.append(X)
    for (curr_num_units, curr_num_sto) in zip(num_units, num_stos):
        _layer_dbg_out = []
        prev_out = hybrid_layer(prev_out,
                                prev_num_units,
                                curr_num_units,
                                curr_num_sto,
                                dbg_out=_layer_dbg_out)
        prev_num_units = curr_num_units
        dbg_out.extend(_layer_dbg_out)
        dbg_out.append(prev_out)
    # TODO_TZ bigger problem! param cannot deterministically influence cost
    #         otherwise the surrogate cost is not complete log likelihood
    net_out = nn.Affine(prev_num_units,
                        size_out,
                        name="InnerProd(%d->%d)" %
                        (prev_num_units, size_out))(prev_out)
    dbg_out.append(net_out)
    # assert prev_num_units == size_out
    # net_out = prev_out
    return X, net_out
Пример #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--profile",action="store_true")
    parser.add_argument("--unittest",action="store_true")
    parser.add_argument("--epochs",type=int,default=10)
    args = parser.parse_args()

    batchsize = 64
    Xshape = (batchsize, 3, 32, 32)
    X = cgt.tensor4("X", fixed_shape = Xshape)
    y = cgt.vector("y", fixed_shape = (batchsize,), dtype='i4')

    conv1 = nn.SpatialConvolution(3, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=1e-4))(X)
    relu1 = nn.rectify(conv1)
    pool1 = nn.max_pool_2d(relu1, kernelshape=(3,3), stride=(2,2))
    conv2 = nn.SpatialConvolution(32, 32, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(relu1)
    relu2 = nn.rectify(conv2)
    pool2 = nn.max_pool_2d(relu2, kernelshape=(3,3), stride=(2,2))
    conv3 = nn.SpatialConvolution(32, 64, kernelshape=(5,5), pad=(2,2), 
        weight_init=nn.IIDGaussian(std=0.01))(pool2)
    pool3 = nn.max_pool_2d(conv3, kernelshape=(3,3), stride=(2,2))
    relu3 = nn.rectify(pool3)
    d0,d1,d2,d3 = relu3.shape
    flatlayer = relu3.reshape([d0,d1*d2*d3])
    nfeats = cgt.infer_shape(flatlayer)[1]
    ip1 = nn.Affine(nfeats, 10)(flatlayer)
    logprobs = nn.logsoftmax(ip1)
    loss = -logprobs[cgt.arange(batchsize), y].mean()

    params = nn.get_parameters(loss)
    updates = rmsprop_updates(loss, params, stepsize=1e-3)
    
    train = cgt.function(inputs=[X, y], outputs=[loss], updates=updates)

    if args.profile: cgt.profiler.start()

    data = np.load("/Users/joschu/Data/cifar-10-batches-py/cifar10.npz")
    Xtrain = data["X_train"]
    ytrain = data["y_train"]

    print fmt_row(10, ["Epoch","Train NLL","Train Err","Test NLL","Test Err","Epoch Time"])
    for i_epoch in xrange(args.epochs):
        for start in xrange(0, Xtrain.shape[0], batchsize):
            tstart = time.time()
            end = start+batchsize
            print train(Xtrain[start:end], ytrain[start:end]), time.time()-tstart
            if start > batchsize*5: break
        # elapsed = time.time() - tstart
        # trainerr, trainloss = computeloss(Xtrain[:len(Xtest)], ytrain[:len(Xtest)])
        # testerr, testloss = computeloss(Xtest, ytest)
        # print fmt_row(10, [i_epoch, trainloss, trainerr, testloss, testerr, elapsed])
        if args.profile: 
            cgt.profiler.print_stats()
            return
        if args.unittest:
            break
Пример #8
0
def make_deep_gru(size_input, size_mem, n_layers, size_output, size_batch):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        update_gate = cgt.sigmoid(
            nn.Affine(size_x, size_mem, name="i2u")(x) +
            nn.Affine(size_mem, size_mem, name="h2u")(prev_h))
        reset_gate = cgt.sigmoid(
            nn.Affine(size_x, size_mem, name="i2r")(x) +
            nn.Affine(size_mem, size_mem, name="h2r")(prev_h))
        gated_hidden = reset_gate * prev_h
        p2 = nn.Affine(size_mem, size_mem)(gated_hidden)
        p1 = nn.Affine(size_x, size_mem)(x)
        hidden_target = cgt.tanh(p1 + p2)
        next_h = (1.0 - update_gate) * prev_h + update_gate * hidden_target
        outputs.append(next_h)
    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    return nn.Module(inputs, outputs)
Пример #9
0
def lstm_block(h_prev, c_prev, x_curr, size_x, size_c, name=''):
    """
    Construct a LSTM cell block of specified number of cells

    :param h_prev: self activations at previous time step
    :param c_prev: self memory state at previous time step
    :param x_curr: inputs from previous layer at current time step
    :param size_x: size of inputs
    :param size_c: size of both c and h
    :return: c and h at current time step
    :rtype:
    """
    input_sums = nn.Affine(size_x, 4 * size_c, name=name+'*x')(x_curr) + \
                 nn.Affine(size_c, 4 * size_c, name=name+'*h')(h_prev)
    c_new = cgt.tanh(input_sums[:, 3 * size_c:])
    sigmoid_chunk = cgt.sigmoid(input_sums[:, :3 * size_c])
    in_gate = sigmoid_chunk[:, :size_c]
    forget_gate = sigmoid_chunk[:, size_c:2 * size_c]
    out_gate = sigmoid_chunk[:, 2 * size_c:3 * size_c]
    c_curr = forget_gate * c_prev + in_gate * c_new
    h_curr = out_gate * cgt.tanh(c_curr)
    return c_curr, h_curr
Пример #10
0
    def __init__(self, n_actions):
        Serializable.__init__(self, n_actions)
        cgt.set_precision('double')
        n_in = 128
        o_no = cgt.matrix("o_no", fixed_shape=(None, n_in))
        a_n = cgt.vector("a_n", dtype='i8')
        q_n = cgt.vector("q_n")
        oldpdist_np = cgt.matrix("oldpdists")

        h0 = (o_no - 128.0) / 128.0
        nhid = 64
        h1 = cgt.tanh(
            nn.Affine(128, nhid, weight_init=nn.IIDGaussian(std=.1))(h0))
        probs_na = nn.softmax(
            nn.Affine(nhid, n_actions,
                      weight_init=nn.IIDGaussian(std=0.01))(h1))
        logprobs_na = cgt.log(probs_na)
        b = cgt.size(o_no, 0)
        logps_n = logprobs_na[cgt.arange(b), a_n]
        surr = (logps_n * q_n).mean()
        kl = (oldpdist_np * cgt.log(oldpdist_np / probs_na)).sum(axis=1).mean()

        params = nn.get_parameters(surr)
        gradsurr = cgt.grad(surr, params)
        flatgrad = cgt.concatenate([p.flatten() for p in gradsurr])

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._f_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_n, q_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], probs_na)

        self.f_probs = cgt.function([o_no], probs_na)
        self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n],
                                      [surr, kl])
        self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad)

        self.pc = ParamCollection(params)
Пример #11
0
    def __init__(self, num_features=None, num_hidden=100):
        stepsize = 0.01
        # with shape (batchsize, ncols)
        X = cgt.matrix("X", fixed_shape=(1, num_features))
        # y: a symbolic variable representing the rewards, which are integers
        y = cgt.scalar("y", dtype='float64')

        hid1 = nn.rectify(
            nn.Affine(num_features,
                      num_hidden,
                      weight_init=nn.IIDGaussian(std=.1),
                      bias_init=nn.Constant(1))(X))
        # One final fully-connected layer, and then a linear activation output for reward
        output = nn.Affine(num_hidden,
                           1,
                           weight_init=nn.IIDGaussian(std=.1),
                           bias_init=nn.Constant(1))(hid1)
        abs_deviation = cgt.abs(output - y).mean()
        params = nn.get_parameters(abs_deviation)
        gparams = cgt.grad(abs_deviation, params)

        updates = [(p, p - stepsize * gp) for (p, gp) in zip(params, gparams)]
        self.predictor = cgt.function([X], output)
        self.updater = cgt.function([X, y], abs_deviation, updates=updates)
Пример #12
0
def hybrid_layer(X, size_in, size_out, size_random, dbg_out=[]):
    assert size_out >= size_random >= 0
    out = cgt.sigmoid(
        nn.Affine(size_in,
                  size_out,
                  name="InnerProd(%d->%d)" % (size_in, size_out))(X))
    dbg_out.append(out)
    if size_random == 0:
        return out
    if size_random == size_out:
        out_s = cgt.bernoulli(out)
        return out_s
    out_s = cgt.bernoulli(out[:, :size_random])
    out = cgt.concatenate([out_s, out[:, size_random:]], axis=1)
    return out
Пример #13
0
 def build_convnet_return_loss(X, y):
     np.random.seed(0)
     conv1 = nn.rectify(
         nn.SpatialConvolution(1,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(X))
     pool1 = nn.max_pool_2d(conv1, kernelshape=(3, 3), stride=(2, 2))
     conv2 = nn.rectify(
         nn.SpatialConvolution(32,
                               32,
                               kernelshape=(3, 3),
                               pad=(0, 0),
                               weight_init=nn.IIDGaussian(std=.1))(pool1))
     pool2 = nn.max_pool_2d(conv2, kernelshape=(3, 3), stride=(2, 2))
     d0, d1, d2, d3 = pool2.shape
     flatlayer = pool2.reshape([d0, d1 * d2 * d3])
     nfeats = cgt.infer_shape(flatlayer)[1]
     logprobs = nn.logsoftmax(nn.Affine(nfeats, 10)(flatlayer))
     loss = -logprobs[cgt.arange(X.shape[0]), y].mean()
     return loss
Пример #14
0
import cgt
from cgt import nn, utils
import numpy as np, numpy.random as nr
from numpy.linalg import norm
from param_collection import ParamCollection

k_in = 1
size_x = 3
size_mem = 4
size_batch = 4

x = cgt.matrix(fixed_shape=(size_batch, size_x))
prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem))
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(
        r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]
Пример #15
0
def s_func_ip(X, size_in, size_out, name):
    return nn.Affine(size_in, size_out, name=name)(X)
Пример #16
0
    def __init__(self, obs_dim, ctrl_dim):

        cgt.set_precision('double')
        Serializable.__init__(self, obs_dim, ctrl_dim)

        self.obs_dim = obs_dim
        self.ctrl_dim = ctrl_dim

        o_no = cgt.matrix("o_no", fixed_shape=(None, obs_dim))
        a_na = cgt.matrix("a_na", fixed_shape=(None, ctrl_dim))
        adv_n = cgt.vector("adv_n")
        oldpdist_np = cgt.matrix("oldpdist", fixed_shape=(None, 2 * ctrl_dim))
        self.logstd = logstd_1a = nn.parameter(np.zeros((1, self.ctrl_dim)),
                                               name="std_1a")
        std_1a = cgt.exp(logstd_1a)

        # Here's where we apply the network
        h0 = o_no
        nhid = 32
        h1 = cgt.tanh(
            nn.Affine(obs_dim, nhid, weight_init=nn.IIDGaussian(std=0.1))(h0))
        h2 = cgt.tanh(
            nn.Affine(nhid, nhid, weight_init=nn.IIDGaussian(std=0.1))(h1))
        mean_na = nn.Affine(nhid,
                            ctrl_dim,
                            weight_init=nn.IIDGaussian(std=0.01))(h2)

        b = cgt.size(o_no, 0)
        std_na = cgt.repeat(std_1a, b, axis=0)

        oldmean_na = oldpdist_np[:, 0:self.ctrl_dim]
        oldstd_na = oldpdist_np[:, self.ctrl_dim:2 * self.ctrl_dim]

        logp_n = ((-.5) * cgt.square(
            (a_na - mean_na) / std_na).sum(axis=1)) - logstd_1a.sum()
        oldlogp_n = ((-.5) * cgt.square(
            (a_na - oldmean_na) / oldstd_na).sum(axis=1)
                     ) - cgt.log(oldstd_na).sum(axis=1)

        ratio_n = cgt.exp(logp_n - oldlogp_n)

        surr = (ratio_n * adv_n).mean()

        pdists_np = cgt.concatenate([mean_na, std_na], axis=1)
        # kl = cgt.log(sigafter/)

        params = nn.get_parameters(surr)

        oldvar_na = cgt.square(oldstd_na)
        var_na = cgt.square(std_na)
        kl = (cgt.log(std_na / oldstd_na) +
              (oldvar_na + cgt.square(oldmean_na - mean_na)) / (2 * var_na) -
              .5).sum(axis=1).mean()

        lam = cgt.scalar()
        penobj = surr - lam * kl
        self._compute_surr_kl = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                             [surr, kl])
        self._compute_grad_lagrangian = cgt.function(
            [lam, oldpdist_np, o_no, a_na, adv_n],
            cgt.concatenate([p.flatten() for p in cgt.grad(penobj, params)]))
        self.f_pdist = cgt.function([o_no], pdists_np)

        self.f_objs = cgt.function([oldpdist_np, o_no, a_na, adv_n],
                                   [surr, kl])

        self.pc = ParamCollection(params)