def propdown(self, hid): """ This function propagates the hidden units activation downwords to the visible units :param hid: Variable Matrix(batch_size, out_channels, image_height_out, image_width_out) - given h_sample :return: Variable Matrix(batch_size, in_channels, image_height, image_width) - probability for each visible units to be v_j = 1 """ batch_size = hid.data.shape[0] if self.real == 0: W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) pre_sigmoid_activation = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) # F.matmul(hid, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible)) v_mean = F.sigmoid(pre_sigmoid_activation) #print('W info ', self.conv.W.data.shape, 'W_flipped info ', W_flipped.data.shape) #print('W info ', self.conv.W.data[3, 0, 2, 3], 'W_flipped info ', W_flipped.data[0, 3, 8, 7]) #print('W info ', self.conv.W.data[3, 0, 8, 7], 'W_flipped info ', W_flipped.data[0, 3, 2, 3]) #print('W info ', self.conv.W.data[19, 0, 4, 0], 'W_flipped info ', W_flipped.data[0, 19, 6, 10]) #print('pre_sigmoidactivation', F.sum(pre_sigmoid_activation).data) #print('v_mean', v_mean.data.shape) #print('v_mean sum', F.sum(v_mean).data) #print('hid', hid.data.shape) else: # TODO: check W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) v_mean = F.convolution_2d(hid, W_flipped, self.conv.a, pad=self.ksize-1) return v_mean
def encode(self, x_input, x_query, answer): m = self.encode_input(x_input) u = self.encode_query(x_query) # print "m.data.shape", m.data.shape # print "u.data.shape", u.data.shape mu = functions.matmul(m, u, transb=True) # print "mu.data.shape", mu.data.shape # print "mu.data", mu.data p = functions.softmax(mu) # print p.data c = self.encode_output(x_input) # print "p.data.shape:", p.data.shape # print "c.data.shape:", c.data.shape # print c.data.shape #(3,50) # print "functions.swapaxes(c ,1, 1):", functions.swapaxes(c ,1, 1).data.shape o = functions.matmul(functions.swapaxes(c ,1, 0), p) #転置して、内積とる (2, 50, 1) o = functions.swapaxes(o ,1, 0) # (2, 50) # print "u.data.shape:", u.data.shape # print "o.data.shape:", o.data.shape # print "u.data:", u.data # print "o.data:", o.data # print "(u+o).data.shape:", (u+o).data.shape predict = self.W(u + o) loss = functions.softmax_cross_entropy(predict, answer) return loss
def check_forward(self, x_data): axis1, axis2 = self.axis1, self.axis2 x = chainer.Variable(x_data) y = functions.swapaxes(x, axis1, axis2) self.assertEqual(y.data.dtype, self.dtype) self.assertTrue((self.x.swapaxes(axis1, axis2) == cuda.to_cpu(y.data)).all())
def __call__(self, src, is_train=False, xp=np): # Some namings B = len(src) # Batch Size N = len(src[0]) # length of source H = self.H src_col = lambda x: Variable(self.xp.array([src[i][x] for i in range(B)], dtype=np.int32)) embed = lambda e, x: e(self.IE(x), is_train=is_train) bi_rnn = lambda x, y: self.AE(F.concat((x[0], y[1]), axis=1)) concat_source = lambda S, s: s if S is None else F.concat((S, s), axis=2) # State Reset self.EF.reset_state() self.EB.reset_state() # Forward + backward encoding s = [] for j in range(N): s.append(( embed(self.EF, src_col(j)), embed(self.EB, src_col(-j-1)) )) # Joining the encoding data together S = None for j in range(N): s_j = bi_rnn(s[j], s[-j-1]) S = concat_source(S, F.reshape(s_j, (B, H, 1))) S = F.swapaxes(S, 1, 2) return S, s_j
def check_backward(self, x_data, y_grad): x = chainer.Variable(x_data) y = functions.swapaxes(x, self.axis1, self.axis2) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data.copy(),)) gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5) gradient_check.assert_allclose(gx, x.grad, rtol=1e-5)
def query(self, u): xp = cuda.get_array_module(u) size = self.m.shape[1] inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, self.m.shape) tc = F.broadcast_to(tc, self.c.shape) p = F.softmax(F.batch_matmul(self.m + tm, u)) o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p) o = F.squeeze(o, -1) u = o + u return u
def __call__(self, xs): """ Forward pass of a sentence. :param xs: a batch of sentences :return h: final hidden states """ xs = self.embed(xs) xs = F.swapaxes(xs, 0, 1) # time, batch, embed self.rnn.reset_state() for x in xs: h = self.rnn(x) h = F.tanh(self.linear(h)) return h
def query(self, u): m = self.m c = self.c batch, size = m.data.shape[:2] inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1]) tm = self.TA(inds) tc = self.TC(inds) tm = F.broadcast_to(tm, (batch,) + tm.data.shape) tc = F.broadcast_to(tc, (batch,) + tc.data.shape) p = F.softmax(F.batch_matmul(m + tm, u)) o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p) o = F.reshape(o, (batch, m.data.shape[2])) u = o + u return u
def reconstruct(self, v): """ :param v: Variable Matrix(batch_size, in_channels, image_height, image_width) :return: reconstructed_v, Variable Matrix(batch_size, in_channels, image_height, image_width) """ batch_size = v.data.shape[0] xp = cuda.get_array_module(v.data) if self.real == 0: h = F.sigmoid(self.conv(v)) else: std_ch = xp.reshape(self.std, (1, self.in_channels, 1, 1)) h = F.sigmoid(self.conv(v / std_ch)) # F.sigmoid(F.matmul(v, self.l.W, transb=True) + F.broadcast_to(self.l.b, (batch_size, self.n_hidden))) W_flipped = F.swapaxes(CF.flip(self.conv.W, axes=(2, 3)), axis1=0, axis2=1) reconstructed_v = F.sigmoid(F.convolution_2d(h, W_flipped, self.conv.a, pad=self.ksize-1)) # = F.sigmoid(F.matmul(h, self.l.W) + F.broadcast_to(self.l.a, (batch_size, self.n_visible))) return reconstructed_v
def check_backward(self, x_data): x = chainer.Variable(x_data) y = functions.swapaxes(x, self.axis1, self.axis2) y.grad = y.data y.backward() gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def forward(self, inputs, devices): x, = inputs y = functions.swapaxes(x, self.axis1, self.axis2) return y,
def f(x): y = functions.swapaxes(x, self.axis1, self.axis2) return y * y
def f(x): return functions.swapaxes(x, self.axis1, self.axis2)
def __call__(self, x): return functions.swapaxes(x, self.axis1, self.axis2)