Пример #1
0
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh):

    # Content addressing

    # Cosine similarity
    # take inner product along memory axis k * M
    numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm)
    # compute denominator |k| * |m|
    denom_bhn = cgt.broadcast(
        "*",
        cgt.norm(k_bhm, axis=2, keepdims=True),  # -> shape bh1
        cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0, 2,
                                                          1]),  # -> bn1 -> b1n
        "xx1,x1x")
    csim_bhn = numer_bhn / denom_bhn
    assert infer_shape(csim_bhn) == (opt.b, 2 * opt.h, opt.n)
    # scale by beta
    tmp_bhn = cgt.broadcast("*", beta_bh[:, :, None], csim_bhn, "xx1,xxx")
    wc_bhn = sum_normalize2(cgt.exp(tmp_bhn))
    # Interpolation
    g_bh1 = g_bh[:, :, None]
    wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \
            + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1")
    # Shift
    wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2)
    # Sharpening
    wfin_bhn = sum_normalize2(
        cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b, 2 * opt.h, 1]),
                      "xxx,xx1"))

    b, h, n = opt.b, 2 * opt.h, opt.n
    assert infer_shape(wtil_bhn) == (b, h, n)
    assert infer_shape(gamma_bh) == (b, h)
    assert infer_shape(gamma_bh[:, :, None]) == (b, h, 1)
    return wfin_bhn
Пример #2
0
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh):

    # Content addressing

    # Cosine similarity
    # take inner product along memory axis k * M
    numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) 
    # compute denominator |k| * |m|
    denom_bhn = cgt.broadcast("*",
        cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1
        cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0,2,1]), # -> bn1 -> b1n
        "xx1,x1x"
    )
    csim_bhn =  numer_bhn / denom_bhn
    assert infer_shape(csim_bhn) == (opt.b, 2*opt.h, opt.n)
    # scale by beta
    tmp_bhn = cgt.broadcast("*", beta_bh[:,:,None], csim_bhn, "xx1,xxx")
    wc_bhn = sum_normalize2(cgt.exp( tmp_bhn ))
    # Interpolation
    g_bh1 = g_bh[:,:,None]
    wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \
            + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1")
    # Shift
    wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2)
    # Sharpening
    wfin_bhn = sum_normalize2(cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b,2*opt.h,1]), "xxx,xx1"))

    b,h,n = opt.b, 2*opt.h, opt.n
    assert infer_shape(wtil_bhn) == (b,h,n)
    assert infer_shape(gamma_bh) == (b,h)
    assert infer_shape(gamma_bh[:,:,None]) == (b,h,1)
    return wfin_bhn
Пример #3
0
Файл: rrnn.py Проект: zobot/rrnn
def make_deep_rrnn(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer==0 else outputs[i_layer-1]
        size_x = size_input if i_layer==0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True
        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
        r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
        r_norm = cgt.norm(r_non, axis=2, keepdims=True)
        r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
        prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
        inters_in = [prev_h_3]

        colon = slice(None, None, None)

        for i in xrange(2 * k_in):
            inter_in = inters_in[-1]
            r_cur = cgt.subtensor(r, [colon, i, colon])
            r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
            r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
            ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
            inter_out = inter_in - 2 * ref_cur
            inters_in.append(inter_out)

        h_in_rot = cgt.reshape(inters_in[-1], (size_batch, size_mem))
        inters_h = [h_in_rot]

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)


    category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Пример #4
0
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output,
                            size_batch_in, k_in, k_h):
    inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)]
    outputs = []
    print 'input_size: ', size_input
    for i_layer in xrange(n_layers):
        prev_h = inputs[
            i_layer +
            1]  # note that inputs[0] is the external input, so we add 1
        x = inputs[0] if i_layer == 0 else outputs[i_layer - 1]
        size_x = size_input if i_layer == 0 else size_mem
        size_batch = prev_h.shape[0]

        xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform")
        xform_h_non = xform_h_param.weight
        xform_h_non.props["is_rotation"] = True

        xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True)
        xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1")

        add_in_lin = nn.Affine(size_x, size_mem)(x)
        add_in_relu = nn.rectify(add_in_lin)

        prev_h_scaled = nn.scale_mag(prev_h)

        h_in_added = prev_h_scaled + add_in_relu
        inters_h = [h_in_added]

        colon = slice(None, None, None)

        for i in xrange(2 * k_h):
            inter_in = inters_h[-1]
            r_cur = xform_h[i, :]
            #r_cur = cgt.subtensor(xform_h, [i, colon])
            r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1))
            r_cur_2 = cgt.reshape(r_cur, (1, size_mem))
            ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2)
            inter_out = inter_in - 2 * ref_cur
            inters_h.append(inter_out)
        next_h = inters_h[-1]
        outputs.append(next_h)

    category_activations = nn.Affine(size_mem, size_output,
                                     name="pred")(outputs[-1])
    logprobs = nn.logsoftmax(category_activations)
    outputs.append(logprobs)

    #print 'len outputs:', len(outputs)
    #print 'len inputs:', len(inputs)

    return nn.Module(inputs, outputs)
Пример #5
0
import cgt
from cgt import nn, utils
import numpy as np, numpy.random as nr
from numpy.linalg import norm
from param_collection import ParamCollection

k_in = 1
size_x = 3
size_mem = 4
size_batch = 4

x = cgt.matrix(fixed_shape=(size_batch, size_x))
prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem))
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(
        r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]
Пример #6
0
import cgt
from cgt import nn, utils
import numpy as np, numpy.random as nr
from numpy.linalg import norm
from param_collection import ParamCollection

k_in = 1
size_x = 3
size_mem = 4
size_batch = 4

x = cgt.matrix(fixed_shape=(size_batch, size_x))
prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem))
r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x)
r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem))
r_norm = cgt.norm(r_non, axis=2, keepdims=True)
r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1")
prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1))
inters = [prev_h_3]

for i in xrange(k_in * 2):
    inter_in = inters[-1]
    r_cur = r[:, i, :]
    r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem))
    r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1))
    ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in))
    inter_out = inter_in - ref_cur
    inters.append(inter_out)
h = inters[-1]
    
r_nn = nn.Module([x], [h])