def patchwise_loss(self, h, a, t): self.loss = F.sum(abs(h - F.reshape(t, (-1, 1)))) self.loss /= self.n_patches if self.n_images > 1: h = F.split_axis(h, self.n_images, 0) a = F.split_axis(a, self.n_images, 0) else: h, a = [h], [a] self.y = h self.a = a
def norm_vec_sentence_level(d, nn_flag=False, include_norm_term=False): dim = d.shape[1] d_list = F.split_axis(d, np.cumsum(lengths)[:-1], axis=0) max_length = np.max(lengths) d_pad = F.pad_sequence(d_list, length=max_length, padding=0.0) d_flat = F.reshape(get_normalized_vector(d_pad, None), (-1, dim)) split_size = np.cumsum(np.full(batchsize, max_length))[:-1] d_list = F.split_axis(d_flat, split_size, axis=0) d_list = [_d[:_length] for _d, _length in zip(d_list, lengths)] d = F.concat(d_list, axis=0) return d
def forward(self, x, l, train, action): if self.xp == np: loc = l.data else: loc = self.xp.asnumpy(l.data) margin = self.g_size/2 loc = (loc+1)*0.5*(self.in_size-self.g_size+1) + margin loc = np.clip(loc, margin, self.in_size-margin) loc = np.floor(loc).astype(np.int32) # Retina Encoding hx = crop(x, loc=loc, size=self.g_size) hx = F.relu(self.emb_x(hx)) # Location Encoding hl = F.relu(self.emb_l(l)) # Glimpse Net g = F.relu(self.fc_lg(hl) + self.fc_xg(hx)) # Core Net h = self.core_lstm(g) # LSTM(g + h_t-1) # Location Net l = F.tanh(self.fc_hl(h)) if train: # sampling location l s = F.gaussian(mean=l, ln_var=self.ln_var) s = F.clip(s, -1., 1.) # location policy l1, l2 = F.split_axis(l, indices_or_sections=2, axis=1) s1, s2 = F.split_axis(s, indices_or_sections=2, axis=1) norm = (s1-l1)*(s1-l1) + (s2-l2)*(s2-l2) ln_p = 0.5 * norm / self.var ln_p = F.reshape(ln_p, (-1,)) if action: # Action Net y = self.fc_ha(h) if train: return s, ln_p, y else: return l, None, y else: if train: return s, ln_p, None else: return l, None, None
def weighted_loss(self, h, a, t): self.loss = 0 if self.n_images > 1: h = F.split_axis(h, self.n_images, 0) a = F.split_axis(a, self.n_images, 0) t = F.split_axis(t, self.n_images, 0) else: h, a, t = [h], [a], [t] for i in range(self.n_images): y = F.sum(h[i] * a[i], 0) / F.sum(a[i], 0) self.loss += abs(y - F.reshape(t[i], (1,))) self.loss /= self.n_images self.y = h self.a = a
def shake_camera(img): s0,s1,s2,s3 = img.data.shape zerobar = Variable(xp.zeros((s0,s1,4,s3),dtype=np.float32)) img = F.concat([zerobar, img, zerobar],axis=2) randshift=np.random.randint(1,8) img = F.split_axis(img, [randshift,randshift+img_w],axis=2)[1] zerobar = Variable(xp.zeros((s0,s1,s2,4,1),dtype=np.float32)) img = F.reshape(img,(s0,s1,s2,s3,1)) img = F.concat([zerobar, img, zerobar],axis=3) randshift=np.random.randint(1,8) img = F.split_axis(img, [randshift,randshift+img_w],axis=3)[1] img = F.reshape(img,(s0,s1,s2,s3)) return img
def compute_vecs(self, word_ids, word_boundaries, phrase_num, char_vecs=None): word_ids = my_variable(word_ids, volatile=not self.train) word_embs = self.emb(word_ids) # total_len x dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim)) if self.word_level_flag and char_vecs is not None: # print char_vecs.data.shape # print word_embs.data.shape word_embs = F.concat([word_embs, char_vecs], axis=1) # print word_embs.data.shape dim = self.emb_dim + self.add_dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim)) # 1 x 1 x total_len x dim # convolution word_emb_conv = self.conv(word_embs_reshape) # 1 x dim x total_len x 1 word_emb_conv_reshape = F.reshape(word_emb_conv, (self.hidden_dim, -1)) # max word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape, word_boundaries, axis=1) embs = [F.max(word_emb_conv_word, axis=1) for i, word_emb_conv_word in enumerate(word_emb_conv_reshape) if i % 2 == 1] embs = F.concat(embs, axis=0) phrase_emb_conv = F.reshape(embs, (phrase_num, self.hidden_dim)) return phrase_emb_conv
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, c, _ = self.encoder(None, None, exs) ys = self.xp.full(batch, EOS, 'i') result = [] for i in range(max_length): eys = self.embed_y(ys) eys = F.split_axis(eys, batch, 0) h, c, ys = self.decoder(h, c, eys) cys = F.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.stack(result).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def sequence_embed(embed, xs, dropout=0.): """Efficient embedding function for variable-length sequences This output is equally to "return [F.dropout(embed(x), ratio=dropout) for x in xs]". However, calling the functions is one-shot and faster. Args: embed (callable): A :func:`~chainer.functions.embed_id` function or :class:`~chainer.links.EmbedID` link. xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \ :class:`cupy.ndarray`): i-th element in the list is an input variable, which is a :math:`(L_i, )`-shaped int array. dropout (float): Dropout ratio. Returns: list of ~chainer.Variable: Output variables. i-th element in the list is an output variable, which is a :math:`(L_i, N)`-shaped float array. :math:`(N)` is the number of dimensions of word embedding. """ x_len = [len(x) for x in xs] x_section = numpy.cumsum(x_len[:-1]) ex = embed(F.concat(xs, axis=0)) ex = F.dropout(ex, ratio=dropout) exs = F.split_axis(ex, x_section, 0) return exs
def transform(self, images, normalized=False): '''Transform image data to latent space. Parameters ---------- images : array-like shape (n_images, image_width, image_height, n_colors) Input numpy array of images. normalized [optional] : bool Normalization flag that specifies whether pixel data is normalized to a [0,1] scale. Returns ------- latent_vec : array-like shape (n_images, latent_dim) ''' n_samp = images.shape[0] x_encoding = images.flatten().reshape((n_samp, -1)) x_encoding = chainer.Variable(x_encoding) if not normalized: x_encoding /= 255. x_encoded = self._encode(x_encoding) mean, std = F.split_axis(x_encoded, 2, 1) # Create `latent_dim` N(0,1) normal samples. samples = np.random.standard_normal(mean.data.shape).astype('float32') if self.flag_gpu: samples = cuda.to_gpu(samples) samples = chainer.Variable(samples) # Scale samples to model trained parameters. sample_set = samples * F.exp(0.5*std) + mean return sample_set.data
def check_forward(self, x_data, ys_data, indices_or_sections, axis): x = chainer.Variable(x_data) ys = functions.split_axis(x, indices_or_sections, axis) for yd, y in zip(ys_data, ys): self.assertEqual(y.data.dtype, self.dtype) self.assertIsInstance(y.data.shape, tuple) gradient_check.assert_allclose(yd, y.data, atol=0, rtol=0)
def encode_query(self, x_datas, i2sD, train=True): h0L = list(F.split_axis( F.dropout( self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)), ratio=self.dropout_ratio, train=train), len(x_datas), axis=0)) for i in i2sD.keys(): h0L[i] = self.W_dxQ(i2sD[i]) placeholder_idx = x_datas.index(self.PH_id) # forward self.Q_f_LSTM.reset_state() for h0 in h0L[:placeholder_idx+1]: state = self.Q_f_LSTM(h0) forward_out = state for h0 in h0L[placeholder_idx+1:]: state = self.Q_f_LSTM(h0) forward_endout = state # backward self.Q_b_LSTM.reset_state() for h0 in reversed(h0L[placeholder_idx:]): state = self.Q_b_LSTM(h0) backward_out = state for h0 in reversed(h0L[:placeholder_idx]): state = self.Q_b_LSTM(h0) backward_endout = state concat_h = F.concat([forward_out, backward_out, forward_endout, backward_endout], axis=1) return self.W_hu(concat_h), self.W_hq(concat_h)
def encode_tokens(self, x_datas, i2sD, train=True): # Embed, dropout, split into each token (batchsize=1) h0L = list(F.split_axis( F.dropout( self.embed(chainer.Variable(self.xp.array(x_datas, dtype=np.int32), volatile=not train)), ratio=self.dropout_ratio, train=train), len(x_datas), axis=0)) # Replace embedding with dynamic entity representation for i in i2sD.keys(): h0L[i] = self.W_dx(i2sD[i]) # LSTM. forward order forward_outL = [] self.f_LSTM.reset_state() for h0 in h0L: state = self.f_LSTM(h0) forward_outL.append(state) # LSTM. backward order backward_outL = [] self.b_LSTM.reset_state() for h0 in reversed(h0L): state = self.b_LSTM(h0) backward_outL.append(state) return forward_outL, backward_outL
def check_forward(self, x1_data, x2_data, x3_data): xp = self.link.xp x1 = chainer.Variable(x1_data) if self.input_variable else x1_data h1 = self.link(x1) with cuda.get_device_from_array(x1_data): c0 = chainer.Variable(xp.zeros((len(self.x1), self.out_size), dtype=self.x1.dtype)) c1_expect, h1_expect = functions.lstm(c0, self.link.upward(x1)) testing.assert_allclose(h1.data, h1_expect.data) testing.assert_allclose(self.link.h.data, h1_expect.data) testing.assert_allclose(self.link.c.data, c1_expect.data) batch = len(x2_data) x2 = chainer.Variable(x2_data) if self.input_variable else x2_data h1_in, h1_rest = functions.split_axis( self.link.h.data, [batch], axis=0) y2 = self.link(x2) with cuda.get_device_from_array(x1): c2_expect, y2_expect = \ functions.lstm(c1_expect, self.link.upward(x2) + self.link.lateral(h1_in)) testing.assert_allclose(y2.data, y2_expect.data) testing.assert_allclose(self.link.h.data[:batch], y2_expect.data) testing.assert_allclose(self.link.h.data[batch:], h1_rest.data) x3 = chainer.Variable(x3_data) if self.input_variable else x3_data h2_rest = self.link.h y3 = self.link(x3) c3_expect, y3_expect = \ functions.lstm(c2_expect, self.link.upward(x3)) testing.assert_allclose(y3.data, y3_expect.data) testing.assert_allclose(self.link.h.data, h2_rest.data)
def translate(self, xs, max_length=100): batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, c, _ = self.encoder(None, None, exs) ys = self.xp.full(batch, EOS, numpy.int32) result = [] for i in range(max_length): eys = self.embed_y(ys) eys = F.split_axis(eys, batch, 0) h, c, ys = self.decoder(h, c, eys) cys = F.concat(ys, axis=0) wy = self.W(cys) ys = self.xp.argmax(wy.array, axis=1).astype(numpy.int32) result.append(ys) # Using `xp.concatenate(...)` instead of `xp.stack(result)` here to # support NumPy 1.9. result = chainer.get_device(numpy).send( self.xp.concatenate([x[None, :] for x in result]).T) # Remove EOS taggs outs = [] for y in result: inds = numpy.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs
def embed_seq_batch(embed, seq_batch, dropout=0., norm_vecs_one=False): batchsize = len(seq_batch) embs = F.dropout(embed(F.concat(seq_batch, axis=0)), ratio=dropout) if norm_vecs_one: embs = get_normalized_vector(embs, None) e_seq_batch = F.split_axis(embs, batchsize, axis=0) # [(len, ), ] x batchsize return e_seq_batch
def check_backward(self, x_data, indices_or_sections, axis): x = chainer.Variable(x_data) ys = functions.split_axis(x, indices_or_sections, axis) for y in ys: y.grad = y.data ys[0].backward() gradient_check.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def reset(self, img_feats): """Batch of image features to LSTM states and hidden representations. """ h = self.embed_img(img_feats) h = F.split_axis(h, h.shape[0], axis=0) hx, cx, ys = self.lstm(None, None, h) return hx, cx, ys
def crop(inputs, outsize, offset): x = F.identity(inputs) crop_axis = [i!=j for i, j in zip(inputs.data.shape, outsize)] i = 0 for index, tf in enumerate(crop_axis): if tf: _, x, _ = F.split_axis(x, [offset[i], offset[i] + outsize[index]], index) i += 1 return x
def check_backward(self, x_data, indices_or_sections, axis): x = chainer.Variable(x_data) ys = functions.split_axis(x, indices_or_sections, axis) # Only set ys[0] ys[0].grad = ys[0].data ys[0].backward() gx = numpy.array([1, 0]) gradient_check.assert_allclose(gx, x.grad, atol=0, rtol=0)
def encode(self, data, test=False): x = self.enc(data, test=test) mean, ln_var = F.split_axis(x, 2, 1) samp = np.random.standard_normal(mean.data.shape).astype('float32') samp = Variable(samp) if self.flag_gpu: samp.to_gpu() z = samp * F.exp(0.5*ln_var) + mean return z, mean, ln_var
def encode(self, bow): """ Convert the bag of words vector of shape (n_docs, n_vocab) into latent mean log variance vectors. """ lam = F.relu(self.l1(bow)) pi = F.relu(self.l2(lam)) mu, log_sigma = F.split_axis(self.mu_logsigma(pi), 2, 1) sample = F.gaussian(mu, log_sigma) loss = F.gaussian_kl_divergence(mu, log_sigma) return sample, loss
def check_forward_single(self, inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) x, = self.inputs x = chainer.Variable(x) with backend_config: ys = functions.split_axis(x, 1, self.axis, force_tuple=False) assert isinstance(ys, chainer.Variable)
def __call__(self, x): x = self.c_attn(x) query, key, value = F.split_axis(x, 3, axis=2) query = self.split_heads(query) key = self.split_heads(key, k=True) value = self.split_heads(value) a = self._attn(query, key, value) a = self.merge_heads(a) a = self.c_proj(a) a = self.resid_dropout(a) return a
def forward(self, inputs, device): x, = inputs ret = functions.split_axis( x, 1, self.axis, force_tuple=self.force_tuple) if self.force_tuple: assert isinstance(ret, tuple) assert len(ret) == 1 return ret else: assert isinstance(ret, chainer.Variable) return ret,
def __call__(self, x): segs = list(itertools.accumulate( clf.n_input for clf in self.classifiers )) if segs: xs = cf.split_axis(x, segs, 1) else: xs = [x] y = self.segmenter(xs[-1]) zs = tuple(clf(x) for x, clf in zip(xs[:-1], self.classifiers)) return y, zs
def __call__(self, x, condition): length = x.shape[2] h = self.conv(x) h = h[:, :, :length] # crop h += condition tanh_z, sig_z = F.split_axis(h, 2, axis=1) z = F.tanh(tanh_z) * F.sigmoid(sig_z) if x.shape[2] == z.shape[2]: residual = self.res(z) + x else: residual = self.res(z) + x[:, :, -1:] # crop skip_conenection = self.skip(z) return residual, skip_conenection
def __call__(self, x, t): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = F.relu(self.conv3_2(h)) h = F.relu(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv4_1(h)) h = F.relu(self.conv4_2(h)) h = F.relu(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv5_1(h)) h = F.relu(self.conv5_2(h)) h = F.relu(self.conv5_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.fc6(h)) h = F.relu(self.fc7(h)) h = self.fc8(h) # Channelwise Inhibited h = F.split_axis(h, 3, 1) c = F.reshape(h[self.c], (x.data.shape[0], 16, 16)) xp = cuda.get_array_module(x.data) volatile = False if t is not None else True z = Variable(xp.zeros_like(c.data), volatile=volatile) c = F.batch_matmul(c, z) c = F.reshape(c, (x.data.shape[0], 1, 16, 16)) hs = [] for i, s in enumerate(h): if i == self.c: hs.append(c) else: hs.append(s) self.pred = F.concat(hs, 1) if t is not None: self.loss = F.softmax_cross_entropy(self.pred, t) self.loss /= 16 * 16 return self.loss else: self.pred = F.softmax(self.pred) return self.pred
def check_backward(self, inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) x, = inputs x = chainer.Variable(x) with backend_config: ys = functions.split_axis(x, self.ys_section, self.axis) # Only set ys[0] ys[0].grad = ys[0].data ys[0].backward() gx = numpy.array([1, 0]) testing.assert_allclose(gx, x.grad, atol=0, rtol=0)
def check_backward(self, inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) x, = inputs x = chainer.Variable(x) with backend_config: ys = functions.split_axis( x, self.ys_section, self.axis, force_tuple=True) for y in ys: y.grad = y.data ys[0].backward() testing.assert_allclose(x.data, x.grad, atol=0, rtol=0)
def check_forward(self, inputs, backend_config): if backend_config.use_cuda: inputs = cuda.to_gpu(inputs) x, = inputs x = chainer.Variable(x) with backend_config: ys = functions.split_axis( x, self.ys_section, self.axis, force_tuple=True) for yd, y in zip(self.ys_expected, ys): assert y.data.dtype == self.dtype assert isinstance(y.data.shape, tuple) testing.assert_allclose(yd, y.data, atol=0, rtol=0)
def onestep(self, ys, hx, cx, oxs, hts): bs = len(ys) emb_ys = self.emb(ys) if self.feeding: hts = F.stack(hts) emb_ys = F.expand_dims(emb_ys, axis=1) emb_ys = F.concat((emb_ys, hts), axis=2) hy, cy, oys = self.rnn(hx, cx, F.separate(emb_ys)) else: emb_ys = F.split_axis(emb_ys, bs, 0) hy, cy, oys = self.rnn(hx, cx, emb_ys) oys = F.stack(oys) oxs = F.stack(oxs) cts = self.attn(oys, oxs) cs = F.concat((oys, cts), axis=2) hts = F.tanh(F.stack(sequence_linear(self.wc, cs))) oys = self.wo(F.concat(hts, axis=0)) return hy, cy, oys, hts
def __call__(self, prev_hg, prev_cg, prev_z, v, r, prev_u): v = self.broadcast_v(v) if r.shape[2] == 1: r = self.broadcast_r(r) lstm_input = cf.concat((prev_hg, v, r, prev_z), axis=1) gate_inputs = self.lstm(lstm_input) forget_gate_input, input_gate_input, tanh_input, output_gate_input = cf.split_axis( gate_inputs, 4, axis=1) forget_gate = cf.sigmoid(forget_gate_input) input_gate = cf.sigmoid(input_gate_input) next_c = forget_gate * prev_cg + input_gate * cf.tanh(tanh_input) output_gate = cf.sigmoid(output_gate_input) next_h = output_gate * cf.tanh(next_c) next_u = self.upsample_h(next_h) + prev_u return next_h, next_c, next_u
def __call__(self, ws, ss, ps, ts): """ xs [(w,s,p,y), ..., ] w: word, s: suffix, p: prefix, y: label """ batchsize, length = ts.shape ys = self.forward(ws, ss, ps)[1:-1] ts = [ F.squeeze(x, 0) for x in F.split_axis(F.transpose(ts), length, 0) ] loss = reduce(lambda x, y: x + y, [F.softmax_cross_entropy(y, t) for y, t in zip(ys, ts)]) acc = reduce( lambda x, y: x + y, [F.accuracy(y, t, ignore_label=IGNORE) for y, t in zip(ys, ts)]) acc /= length chainer.report({"loss": loss, "accuracy": acc}, self) return loss
def get_scores(self,candidates,links,relations,edges,xp,mode,RC,EC): entities = set() for h,r,t,l in candidates: entities.add(h) entities.add(t) entities = list(entities) xe = self.get_context(entities,links,relations,edges,0,xp,RC,EC) xe = F.split_axis(xe,len(entities),axis=0) edict = dict() for e,x in zip(entities,xe): edict[e]=x diffs,rels = [],[] for h,r,t,l in candidates: rels.append(r) diffs.append(edict[h]-edict[t]) diffs = F.concat(diffs,axis=0) xr = self.embedR(xp.array(rels,'i')) if self.is_bound_wr: xr = F.tanh(xr) scores = F.batch_l2_norm_squared(diffs+xr) return scores
def get_test_weight(self, in_size=None): """ When testing, do not generate AWN weights and biases, use the current configuration Args: in_size (int): Input size of the variable to transform Returns: weights(float[][]) """ if self.in_size is None: if in_size is None: raise ValueError("in_size should not be none for test weights") self.in_size = in_size if self.no_bias: return F.reshape(self.mu, (self.out_size, self.in_size)), None else: w, b = F.split_axis(self.mu, [self.out_size * (self.in_size - 1)], axis=0) return F.reshape(w, (self.out_size, self.in_size - 1)), b
def forward(self, x, index_array): x = x.reshape(-1, self.dim) if self.dropout_rate != 0: x = F.dropout(x, ratio=self.dropout_rate) z = self.x2z(x) if self.activate == 'tanh': z = F.tanh(z) if self.activate == 'relu': z = F.relu(z) if self.is_residual: z = z + x split_array = F.split_axis(z, index_array, axis=0)[:-1] a = [] for i in split_array: if len(i) > 0: a.append(F.average(i, axis=0)) else: a.append(Variable(np.zeros(self.dim, dtype=np.float32))) p = F.stack(a) return p
def embedding_layer(self, chars, words, train): """ word embeddings = word embedding + character embedding """ if chars is not None: chars_len = xp.array([x.shape[0] for x in chars]).astype('i') chars_section = xp.cumsum(chars_len[:-1]) chars = self.char_embed(F.concat(chars, axis=0)) chars = F.split_axis(chars, chars_section, axis=0) _, __, chars_encs = self.bi_char(None, None, chars) chars = F.get_item(F.vstack(chars_encs), xp.cumsum(chars_len) - 1) words = self.word_embed(F.concat(words, axis=0)) words = F.concat((words, chars), axis=1) else: words = F.concat(words, axis=0) if train: words = F.dropout(words, self.dropout_ratio) return words
def __call__(self, x, margin_factor=1.0, train=True): """ Embed samples using the CNN, then calculate distances and triplet loss. x is a batch of size 3n following the form: | anchor_1 | | [...] | | anchor_n | | positive_1 | | [...] | | positive_n | | negative_1 | | [...] | | negative_n | """ anc, pos, neg = (self.embed(h) for h in F.split_axis(x, 3, 0)) dist_pos, dist_neg = self.squared_distance(anc, pos, neg) mf = margin_factor if train else 1.0 # no margin when testing return self.compute_loss(dist_pos, dist_neg, mf)
def advance_state(self, previous_states, prev_y): current_mb_size = prev_y.data.shape[0] assert self.mb_size is None or current_mb_size <= self.mb_size if current_mb_size < len(previous_states[0].data): truncated_states = [None] * len(previous_states) for num_state in xrange(len(previous_states)): truncated_states[num_state], _ = F.split_axis( previous_states[num_state], (current_mb_size, ), 0) previous_states = tuple(truncated_states) output_state = previous_states[-1] if self.decoder_chain.use_goto_attention: ci, attn = self.compute_ctxt(output_state, prev_y) else: ci, attn = self.compute_ctxt(output_state) concatenated = F.concat((prev_y, ci)) new_states = self.decoder_chain.gru(previous_states, concatenated) return new_states, concatenated, attn
def update_core(self): batch = self._iterators['main'].next() in_arrays = self.converter(batch, self.device) true_x = in_arrays # mnist (200, 1, 28, 28) # create input z as random batchsize = true_x.shape[0] if self.device == 0: z = cuda.cupy.random.normal(size=(batchsize, self.z_dim, 1, 1), dtype=np.float32) z = Variable(z) else: z = np.random.uniform(-1, 1, (batchsize, self.z_dim, 1, 1)) z = z.astype(dtype=np.float32) z = Variable(z) # G -> x1 -> y of gen # + -> X -> D -> split # Truedata -> x2 -> y of true data gen_output = self.gen(z) # gen_output (200, 1, 28, 28) x = F.concat((gen_output, true_x), 0) # gen_output + true_data = (400, 1, 28, 28) dis_output = self.dis(x) y_gen, y_data = F.split_axis(dis_output, 2, 0) # 0~1 value (200, 1, 1, 1) # DがGの生成物を1(間違い), TrueDataを0(正しい)と判定するように学習させる # sigmoid_cross_entropy(x, 0) == softplus(x) # sigmoid_cross_entropy(x, 1) == softplus(-x) loss_gen = F.sum(F.softplus(-y_gen)) loss_data = F.sum(F.softplus(y_data)) loss = (loss_gen + loss_data) / batchsize for optimizer in self._optimizers.values(): optimizer.target.cleargrads() loss.backward() for optimizer in self._optimizers.values(): optimizer.update() reporter.report({'loss':loss, 'gen/loss':loss_gen / batchsize, 'dis/loss':loss_data / batchsize}) save_image(gen_output, self.epoch, self.device)
def __call__(self, batch_size, in_size=None): """ Update the weigths Args: batch_size (Variable): Size of the current batch in_size (int): Input size of the variable to transform Returns: weight (float[][]), loss (float) """ if self.mu.data is None or self.sigma.data is None: if in_size is None: raise ValueError( "AdaptiveWeightNoise requires a in_size to intialize it's Parameters" ) self._initialize_params(in_size) # Base parameters mu_h = F.broadcast_to(F.mean(self.mu), self.mu.shape) diff_mu_h = F.square(self.mu - mu_h) sigma_h = F.broadcast_to(F.mean(F.exp(self.sigma) + diff_mu_h), self.sigma.shape) # Weight and bias eps = variable.Variable( self.xp.random.randn(self.mu.size).astype(self.xp.float32)) W = self.mu + eps * F.sqrt(F.exp(self.sigma)) # Loss loss_x = (F.log(sigma_h) - self.sigma) / 2. loss_y = (diff_mu_h + F.exp(self.sigma) - sigma_h) / ( (2. * sigma_h) + 1e-8) loss = F.reshape(F.sum(loss_x + loss_y), (1, )) / batch_size # Extract the bias if required if self.no_bias: return F.reshape(W, (self.out_size, self.in_size)), None, loss else: w, b = F.split_axis(W, [self.out_size * (self.in_size - 1)], axis=0) return F.reshape(w, (self.out_size, self.in_size - 1)), b, loss
def train(self, positive, negative, links, relations, edges, xp): self.cleargrads() entities = set() for h, r, t in positive: entities.add(h) entities.add(t) for h, r, t in negative: entities.add(h) entities.add(t) entities = list(entities) x = self.get_context(entities, links, relations, edges, 0, xp) x = F.split_axis(x, len(entities), axis=0) edict = dict() for e, x in zip(entities, x): edict[e] = x pos, rels = [], [] for h, r, t in positive: rels.append(r) pos.append(edict[h] - edict[t]) pos = F.concat(pos, axis=0) xr = self.embedR(xp.array(rels, 'i')) if self.is_bound_wr: xr = F.tanh(xr) pos = F.batch_l2_norm_squared(pos + xr) neg, rels = [], [] for h, r, t in negative: rels.append(r) neg.append(edict[h] - edict[t]) neg = F.concat(neg, axis=0) xr = self.embedR(xp.array(rels, 'i')) if self.is_bound_wr: xr = F.tanh(xr) neg = F.batch_l2_norm_squared(neg + xr) if self.objective_function == 'relative': return sum(F.relu(self.threshold + pos - neg)) if self.objective_function == 'absolute': return sum(pos + F.relu(self.threshold - neg))
def translate(self, xs, cur_max_index=1): """ 每次保证只传一个视频 :param xs: :param max_length: :return: """ max_length = len(xs) xs = [xs] batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): xs = [x[::-1] for x in xs] exs = sequence_embed(self.embed_x, xs) h, c, _ = self.encoder(None, None, exs) ys = self.xp.full(batch, EOS, np.int32) result = [] for i in range(max_length): eys = self.embed_y(ys) eys = F.split_axis(eys, batch, 0) h, c, ys = self.decoder(h, c, eys) cys = F.concat(ys, axis=0) wy = self.W(cys) # 想办法生成3个 tmp = [] for i in range(cur_max_index): ys = self.xp.argmax(wy.data, axis=1).astype(np.int32) t = wy.data[0][ys] tmp.append((ys, t)) wy.data[0][ys] = -1 # 把它复原,看能不能那么乱 for i in range(cur_max_index): yss, t = tmp[i] wy.data[0][yss] = t result.append(ys) # Using `xp.concatenate(...)` instead of `xp.stack(result)` here to # support np 1.9. result = cuda.to_cpu( self.xp.concatenate( [self.xp.expand_dims(x, 0) for x in result]).T) return result
def get_scores(self, candidates, train_link, relations, aux_link, xp, mode): entities = set() for h, r, t, l in candidates: entities.add(h) entities.add(t) entities = list(entities) xe = self.get_context(entities, train_link, relations, aux_link, 0, xp) xe = F.split_axis(xe, len(entities), axis=0) edict = dict() for e, x in zip(entities, xe): edict[e] = x diffs, rels = [], [] for h, r, t, l in candidates: rels.append(r) diffs.append(edict[h] - edict[t]) diffs = F.concat(diffs, axis=0) xr = F.tanh(self.embedR(xp.array(rels, 'i'))) # TransE score function # f = || h + r - t || scores = F.batch_l2_norm_squared(diffs + xr) return scores
def _n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, recurrent_dropout_ratio=0.0, use_variational_dropout=False, **kwargs): if chainer.configuration.config.train and recurrent_dropout_ratio > 0.0: if use_variational_dropout: return _n_step_rnn_impl( F.rnn.n_step_lstm._lstm, n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, recurrent_dropout_ratio, use_variational_dropout) else: # drop connect ws = [list(w) for w in ws] r_ws = [] for i in range(len(ws)): r_ws.extend(ws[i][4:]) r_ws = F.split_axis(F.dropout( F.concat(r_ws), recurrent_dropout_ratio), len(r_ws), axis=1) for i in range(len(ws)): ws[i][4:] = r_ws[4 * i: 4 * (i + 1)] return F.rnn.n_step_lstm.n_step_lstm_base( n_layers, dropout_ratio, hx, cx, ws, bs, xs, use_bi_direction, **kwargs)
def prediction(self, x): x = Variable(x) ecfp = self.build_ecfp(x) fcfp = self.build_fcfp(x) ecfp_fcfp = F.concat((ecfp, fcfp), axis=1) h1 = self.attention_layer1(ecfp_fcfp) #h2 = self.attention_layer2(h1) attentions_1 = F.rrelu(self.attention_layer2(h1)) attentions_2 = F.rrelu(self.attention_layer3(h1)) attentions = F.concat((attentions_1, attentions_2), axis=1) attentions = F.softmax(attentions) attentions = F.split_axis(attentions, 2, 1) attention_ecfp = attentions[0] attention_fcfp = attentions[1] attentioned_ecfc = F.concat( (attention_ecfp * ecfp, attention_fcfp * fcfp), axis=1) #attentioned_ecfc = F.concat((attention_ecfp * ecfp, attention_fcfp * fcfp),axis=1) pred = self.dnn(attentioned_ecfc) return pred, attention_ecfp, attention_fcfp
def __call__(self, input_tensor, cur_state, time): h_cur, c_cur = cur_state # batch_size = h_cur.shape[0] combined = F.concat([input_tensor, h_cur], axis=1) # concatenate along channel axis combined_conv = self.conv(combined) combined_conv = self.bn_ih(combined_conv, time=time) assert combined_conv.shape[1] % self.hidden_dim == 0 assert combined_conv.shape[1] // self.hidden_dim == 4 cc_i, cc_f, cc_o, cc_g = F.split_axis(combined_conv, combined_conv.shape[1] // self.hidden_dim, axis=1) i = F.sigmoid(cc_i) f = F.sigmoid(cc_f) o = F.sigmoid(cc_o) g = F.tanh(cc_g) c_next = f * c_cur + i * g h_next = o * F.tanh(c_next) return h_next, c_next
def a(): vocab_size = 6 seq_length = 5 batchsize = 2 x = np.random.normal(0, 1, size=batchsize * vocab_size * seq_length * 2).reshape( (batchsize, vocab_size, 1, seq_length * 2)).astype(np.float32) x = Variable(x) x = functions.swapaxes(x, 1, 3) x = functions.reshape(x, (batchsize, -1)) x = functions.split_axis(x, seq_length * 2, axis=1) t = np.asarray([ [1, 2, 3, 4, 5], [1, 2, 3, 0, 0], ], dtype=np.int32) t = Variable(t) x_length = Variable(np.asarray([seq_length * 2, seq_length])) t_length = Variable(np.asarray([5, 3])) loss = ctc.connectionist_temporal_classification(x, t, 0, x_length, t_length)
def encode_words(self, word_list): word_lengths = [len(w) for w in word_list] batch_split = np.cumsum(word_lengths[:-1]) word_vars = [ chainer.Variable(self.xp.array(w, dtype=self.xp.int32)) for w in word_list ] embeddings = self.embed_layer(F.concat(word_vars, axis=0)) if self.inp_dropout > 0.: embeddings = F.dropout(embeddings, ratio=self.inp_dropout) # split back to batch size batch_embeddings = F.split_axis(embeddings, batch_split, axis=0) _, _, hs = self.rnn(None, None, batch_embeddings) self.embedding = F.vstack([h[-1] for h in hs]) for i, word in enumerate(word_list): self.cache[tuple(word)] = i
def __call__(self, vis=None, sos=None, word=None): if sos is not None: h = self.vis2h(vis) + self.sos2h(sos) a, i, o, g = F.split_axis(h, 4, axis=1) a = F.tanh(a) i = F.sigmoid(i) o = F.sigmoid(o) g = F.sigmoid(g) c = a * i h = F.dropout(o * F.tanh(c), ratio=self.dropout_ratio) self.af_LSTM.set_state(c, h) else: word_emb = F.dropout(word, ratio=self.dropout_ratio) g = F.sigmoid(self.w2h(word_emb) + self.h2h(self.af_LSTM.h)) h = F.dropout(self.af_LSTM(word_emb), ratio=self.dropout_ratio) s_t = F.dropout(g * F.tanh(self.af_LSTM.c), ratio=self.dropout_ratio) return s_t, h
def __call__(self, xs, batchsize): doc_len = xs[0].shape[0] xs_embed = [F.dropout(self.embed(Variable(item)), ratio=0.5) for item in xs] hy, cy, ys = self.lstm(hx=None, cx=None, xs=xs_embed) ys = [F.dropout(item, ratio=0.25) for item in ys] # attentionの計算 concat_ys = F.concat(ys, axis=0) attn = F.dropout(self.l_attn(concat_ys), ratio=0.25) split_attention = F.split_axis(attn, np.cumsum([len(item) for item in xs])[:-1], axis=0) split_attention_pad = F.pad_sequence(split_attention, padding=-1024.0) attn_softmax = F.softmax(split_attention_pad, axis=1) ys_pad = F.pad_sequence(ys, length=None, padding=0.0) ys_pad_reshape = F.reshape(ys_pad, (-1, ys_pad.shape[-1])) attn_softmax_reshape = F.broadcast_to(F.reshape(attn_softmax, (-1, attn_softmax.shape[-1])), ys_pad_reshape.shape) attention_hidden = ys_pad_reshape * attn_softmax_reshape attention_hidden_reshape = F.reshape(attention_hidden, (batchsize, -1, attention_hidden.shape[-1])) result = F.sum(attention_hidden_reshape, axis=1) y = self.l3(result) return y, attn_softmax[0].data
def forward(self, xs): h = chainer.dataset.convert.concat_examples(xs, padding=-1) h = h.transpose(0, 3, 1, 2) # (1, 3, 300, 40) h = self.c1(h) # (1, 128, 300, 40) h = self.b1(h) h = self.af(h) h = F.max_pooling_2d(h, ksize=(2, 4), stride=(2, 4)) # (1, 128, 150, 10) # h = F.dropout(h, ratio=self.dropout_rate) h = self.c2(h) # (1, 512, 150, 10) h = self.b2(h) h = self.af(h) h = F.max_pooling_2d(h, ksize=(1, 2), stride=(1, 2)) # (1, 512, 150, 5) # h = F.dropout(h, ratio=self.dropout_rate) h = h.transpose(0, 2, 1, 3) # (1, 150, 512, 5) h = h.reshape(len(xs) * h.shape[1], -1) # (1 * 150, 6144) h = self.cl(h) # (1 * 150, 786) h = h.reshape(len(xs), -1, 786) # (1, 150, 786) last_h, last_c, ys = self.lstm(None, None, [_ for _ in h]) y_len = [len(y) for y in ys] y_section = np.cumsum(y_len[:-1]) ay = self.a2(F.relu(self.a1(F.dropout(F.concat(ys, axis=0), ratio=self.dropout_rate)))) ays = F.split_axis(ay, y_section, 0) h_list = [] for y, ay in zip(ys, ays): h_list.append(F.sum(y * F.broadcast_to(F.softmax(ay, axis=0), y.shape), axis=0)[None, :]) h = F.concat(h_list, axis=0) # h = F.dropout(h, ratio=self.dropout_rate) h = self.l3(h) h = self.b3(h) h = self.af(h) y = self.l4(h) return y
def predict(self, input_x): output = self.predictor(input_x) batch_size, input_channel, input_h, input_w = input_x.shape batch_size, _, grid_h, grid_w = output.shape x, y, w, h, conf, prob = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probablitiyのacitivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) # x, y, w, hを絶対座標へ変換 x_shift = Variable( np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape)) y_shift = Variable( np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape)) w_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape)) h_anchor = Variable( np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape)) #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = F.exp(w) * w_anchor / grid_w box_h = F.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def __call__(self, xs): """ xs : list(Variable) y : xp.array """ hy, _, _ = self.xh(None, None, xs) h = F.relu( F.concat(F.concat(F.split_axis(hy, 2, axis=0), axis=2), axis=0)) h_self = F.dropout(F.relu(self.h_100_self(h))) y_self = self.h_1_self(h_self) h_qyn = F.dropout(F.relu(self.h_100_qyn(h))) y_qyn = self.h_1_qyn(h_qyn) h_qw = F.dropout(F.relu(self.h_100_qw(h))) y_qw = self.h_1_qw(h_qw) h_ayn = F.dropout(F.relu(self.h_100_ayn(h))) y_ayn = self.h_1_ayn(h_ayn) h_aw = F.dropout(F.relu(self.h_100_aw(h))) y_aw = self.h_1_aw(h_aw) h_res = F.dropout(F.relu(self.h_100_res(h))) y_res = self.h_1_res(h_res) h_fil = F.dropout(F.relu(self.h_100_fil(h))) y_fil = self.h_1_fil(h_fil) h_con = F.dropout(F.relu(self.h_100_con(h))) y_con = self.h_1_con(h_con) h_req = F.dropout(F.relu(self.h_100_req(h))) y_req = self.h_1_req(h_req) y = F.concat( (y_self, y_qyn, y_qw, y_ayn, y_aw, y_res, y_fil, y_con, y_req)) return y
def __call__(self, X, split_into_variables=True, add_noise_to_input=True): xp = self.xp batchsize = X.shape[0] seq_length = X.shape[1] enmbedding = self.embed(X) # insert noise at <BLANK> (optional) if add_noise_to_input: noise = xp.random.normal(0, 1, enmbedding.shape) mask = X == BLANK mask = xp.broadcast_to(xp.expand_dims(mask, 2), noise.shape) enmbedding += noise * mask enmbedding = F.swapaxes(enmbedding, 1, 2) in_data = [] if self.ndim_embedding == self.ndim_h: in_data.append(enmbedding) out_data = self._forward_layer(0, enmbedding) in_data.append(out_data) for layer_index in xrange(1, self.num_layers): out_data = self._forward_layer(layer_index, sum(in_data) if self.densely_connected else in_data[-1]) # dense conv in_data.append(out_data) out_data = sum(in_data) if self.densely_connected else out_data # dense conv if self.dropout: out_data = F.dropout(out_data, ratio=self.dropout) out_data = self.dense(out_data) if split_into_variables: out_data = F.swapaxes(out_data, 1, 2) out_data = F.reshape(out_data, (batchsize, -1)) out_data = F.split_axis(out_data, seq_length, axis=1) else: out_data = F.swapaxes(out_data, 1, 2) return out_data
def __call__(self, site_feat, nbr_feat, nbr_feat_idx): n_site, n_nbr, n_nbr_feat = nbr_feat.shape _, n_site_feat = site_feat.shape site_nbr_feat = site_feat[nbr_feat_idx] total_feat = functions.concat([ functions.broadcast_to(site_feat[:, None, :], (n_site, n_nbr, n_site_feat)), site_nbr_feat, nbr_feat ], axis=2) total_feat = self.fc( total_feat.reshape(n_site * n_nbr, 2 * n_site_feat + n_nbr_feat)) total_feat = self.bn1(total_feat).reshape(n_site, n_nbr, 2 * n_site_feat) feat_gate, feat_core = functions.split_axis(total_feat, 2, axis=-1) feat_gate = functions.sigmoid(feat_gate) feat_core = functions.softplus(feat_core) feat_sum = functions.sum(feat_gate * feat_core, axis=1) feat_sum = self.bn2(feat_sum) out = functions.softplus(site_feat + feat_sum) return out
def forward_nstep_lstm(self, es): # e shape = list of (sentence_length, in_channels) out_es = [] es = F.stack(es) # B, T, D es = F.transpose(es, axes=(0, 2, 1)) # B, D, T for e in es: e = F.transpose(e, axes=(1, 0)) # D, T e = F.expand_dims(e, axis=0) # 1,D,T for conv_layer in self.layer_name: e = self.act(getattr(self, conv_layer)(e)) e = F.transpose(e, axes=(0, 2, 1))[0] # return B, T, D, then [0] = T,D e = F.dropout(e, self.dropout_ratio) out_es.append(e) out_es = F.stack(out_es) # B,T,D return [ F.squeeze(e) for e in F.split_axis(self.layer_norm(out_es), out_es.shape[0], axis=0, force_tuple=True) ] # return list of (T,D)
def __call__(self, prev_hg, prev_cg, prev_z, v, r, u): u = self.downsample_u(u) v = self.broadcast_v(v) if r.shape[2] == 1: r = self.broadcast_r(r) lstm_input = cf.concat((prev_hg, v, r, prev_z, u), axis=1) gate_inputs = self.lstm(lstm_input) if self.use_cuda_kernel: next_h, next_c = CoreFunction()(gate_inputs, prev_cg) else: forget_gate_input, input_gate_input, tanh_input, output_gate_input = cf.split_axis( gate_inputs, 4, axis=1) forget_gate = cf.sigmoid(forget_gate_input) input_gate = cf.sigmoid(input_gate_input) next_c = forget_gate * prev_cg + input_gate * cf.tanh(tanh_input) output_gate = cf.sigmoid(output_gate_input) next_h = output_gate * cf.tanh(next_c) return next_h, next_c
def forward(self, inputs): """ Parameters ---------- inputs: ``torch.autograd.Variable`` Shape ``(batch_size, timesteps, 50)`` of character ids representing the current batch. Returns ------- Dict with keys: ``'activations'``: ``List[torch.autograd.Variable]`` A list of activations at each layer of the network, each of shape ``(batch_size, timesteps + 2, embedding_dim)`` ``'mask'``: ``torch.autograd.Variable`` Shape ``(batch_size, timesteps + 2)`` long tensor with sequence mask. Note that the output tensors all include additional special begin and end of sequence markers. """ token_embedding = self._token_embedder.forward(inputs) type_representation = token_embedding['token_embedding'] mask = token_embedding['mask'] lstm_outputs = self._elmo_lstm.forward(type_representation, mask) # Prepare the output. The first layer is duplicated. output_tensors = [ F.concat([type_representation, type_representation], axis=-1) ] for layer_activations in F.split_axis(lstm_outputs, lstm_outputs.shape[0], axis=0): output_tensors.append(F.squeeze(layer_activations, 0)) return { 'activations': output_tensors, 'mask': mask, }
def translate(self, xs, max_length=100): n_batch = len(xs) with chainer.no_backprop_mode(), chainer.using_config('train', False): exs = [self.embed_src(x) for x in xs] h, c, memory = self.encoder(None, None, exs) memory_dim = memory[0].shape[1] memory = [x.data[:, :memory_dim // 2] + x.data[:, memory_dim // 2:] for x in memory] h = self.xp.dstack([h.data[i] + h.data[i + 1] for i in range(0, 2 * self.n_layers, 2)]).transpose(2, 0, 1) c = self.xp.dstack([c.data[i] for i in range(0, 2 * self.n_layers, 2)]).transpose(2, 0, 1) ys = self.xp.full(n_batch, BOS, 'i') result = [] for _ in range(max_length): eys = self.embed_dst(ys) eys = F.split_axis(eys, n_batch, 0) h, c, ys = self.decoder_with_attn(h, c, eys, memory) cys = F.concat(ys, axis=0) wy = self.fc(cys) ys = self.xp.argmax(wy.data, axis=1).astype('i') result.append(ys) result = cuda.to_cpu(self.xp.vstack(result)).T outs = [] for y in result: inds = np.argwhere(y == EOS) if len(inds) > 0: y = y[:inds[0, 0]] outs.append(y) return outs