def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast( "*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0, 2, 1]), # -> bn1 -> b1n "xx1,x1x") csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2 * opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:, :, None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp(tmp_bhn)) # Interpolation g_bh1 = g_bh[:, :, None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2( cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b, 2 * opt.h, 1]), "xxx,xx1")) b, h, n = opt.b, 2 * opt.h, opt.n assert infer_shape(wtil_bhn) == (b, h, n) assert infer_shape(gamma_bh) == (b, h) assert infer_shape(gamma_bh[:, :, None]) == (b, h, 1) return wfin_bhn
def ntm_address(opt, wprev_bhn, M_bnm, k_bhm, beta_bh, g_bh, s_bh3, gamma_bh): # Content addressing # Cosine similarity # take inner product along memory axis k * M numer_bhn = cgt.einsum("bhm,bnm->bhn", k_bhm, M_bnm) # compute denominator |k| * |m| denom_bhn = cgt.broadcast("*", cgt.norm(k_bhm, axis=2, keepdims=True), # -> shape bh1 cgt.norm(M_bnm, axis=2, keepdims=True).transpose([0,2,1]), # -> bn1 -> b1n "xx1,x1x" ) csim_bhn = numer_bhn / denom_bhn assert infer_shape(csim_bhn) == (opt.b, 2*opt.h, opt.n) # scale by beta tmp_bhn = cgt.broadcast("*", beta_bh[:,:,None], csim_bhn, "xx1,xxx") wc_bhn = sum_normalize2(cgt.exp( tmp_bhn )) # Interpolation g_bh1 = g_bh[:,:,None] wg_bhn = cgt.broadcast("*", wprev_bhn, (1 - g_bh1), "xxx,xx1") \ + cgt.broadcast("*", wc_bhn, g_bh1, "xxx,xx1") # Shift wtil_bhn = circ_conv_1d(wg_bhn, s_bh3, axis=2) # Sharpening wfin_bhn = sum_normalize2(cgt.broadcast("**", wtil_bhn, gamma_bh.reshape([opt.b,2*opt.h,1]), "xxx,xx1")) b,h,n = opt.b, 2*opt.h, opt.n assert infer_shape(wtil_bhn) == (b,h,n) assert infer_shape(gamma_bh) == (b,h) assert infer_shape(gamma_bh[:,:,None]) == (b,h,1) return wfin_bhn
def make_deep_rrnn(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers+1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[i_layer+1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer==0 else outputs[i_layer-1] size_x = size_input if i_layer==0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters_in = [prev_h_3] colon = slice(None, None, None) for i in xrange(2 * k_in): inter_in = inters_in[-1] r_cur = cgt.subtensor(r, [colon, i, colon]) r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - 2 * ref_cur inters_in.append(inter_out) h_in_rot = cgt.reshape(inters_in[-1], (size_batch, size_mem)) inters_h = [h_in_rot] for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output,name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
def make_deep_rrnn_rot_relu(size_input, size_mem, n_layers, size_output, size_batch_in, k_in, k_h): inputs = [cgt.matrix() for i_layer in xrange(n_layers + 1)] outputs = [] print 'input_size: ', size_input for i_layer in xrange(n_layers): prev_h = inputs[ i_layer + 1] # note that inputs[0] is the external input, so we add 1 x = inputs[0] if i_layer == 0 else outputs[i_layer - 1] size_x = size_input if i_layer == 0 else size_mem size_batch = prev_h.shape[0] xform_h_param = nn.TensorParam((2 * k_h, size_mem), name="rotxform") xform_h_non = xform_h_param.weight xform_h_non.props["is_rotation"] = True xform_h_norm = cgt.norm(xform_h_non, axis=1, keepdims=True) xform_h = cgt.broadcast('/', xform_h_non, xform_h_norm, "xx,x1") add_in_lin = nn.Affine(size_x, size_mem)(x) add_in_relu = nn.rectify(add_in_lin) prev_h_scaled = nn.scale_mag(prev_h) h_in_added = prev_h_scaled + add_in_relu inters_h = [h_in_added] colon = slice(None, None, None) for i in xrange(2 * k_h): inter_in = inters_h[-1] r_cur = xform_h[i, :] #r_cur = cgt.subtensor(xform_h, [i, colon]) r_cur_2_transpose = cgt.reshape(r_cur, (size_mem, 1)) r_cur_2 = cgt.reshape(r_cur, (1, size_mem)) ref_cur = cgt.dot(cgt.dot(inter_in, r_cur_2_transpose), r_cur_2) inter_out = inter_in - 2 * ref_cur inters_h.append(inter_out) next_h = inters_h[-1] outputs.append(next_h) category_activations = nn.Affine(size_mem, size_output, name="pred")(outputs[-1]) logprobs = nn.logsoftmax(category_activations) outputs.append(logprobs) #print 'len outputs:', len(outputs) #print 'len inputs:', len(inputs) return nn.Module(inputs, outputs)
import cgt from cgt import nn, utils import numpy as np, numpy.random as nr from numpy.linalg import norm from param_collection import ParamCollection k_in = 1 size_x = 3 size_mem = 4 size_batch = 4 x = cgt.matrix(fixed_shape=(size_batch, size_x)) prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem)) r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul( r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1]
import cgt from cgt import nn, utils import numpy as np, numpy.random as nr from numpy.linalg import norm from param_collection import ParamCollection k_in = 1 size_x = 3 size_mem = 4 size_batch = 4 x = cgt.matrix(fixed_shape=(size_batch, size_x)) prev_h = cgt.matrix(fixed_shape=(size_batch, size_mem)) r_vec = nn.Affine(size_x, 2 * k_in * size_mem)(x) r_non = cgt.reshape(r_vec, (size_batch, 2 * k_in, size_mem)) r_norm = cgt.norm(r_non, axis=2, keepdims=True) r = cgt.broadcast('/', r_non, r_norm, "xxx,xx1") prev_h_3 = cgt.reshape(prev_h, (size_batch, size_mem, 1)) inters = [prev_h_3] for i in xrange(k_in * 2): inter_in = inters[-1] r_cur = r[:, i, :] r_cur_3_transpose = cgt.reshape(r_cur, (size_batch, 1, size_mem)) r_cur_3 = cgt.reshape(r_cur, (size_batch, size_mem, 1)) ref_cur = cgt.batched_matmul(r_cur_3, cgt.batched_matmul(r_cur_3_transpose, inter_in)) inter_out = inter_in - ref_cur inters.append(inter_out) h = inters[-1] r_nn = nn.Module([x], [h])