def __call__(self, x, s1, s2): h = F.relu(self.conv1(x)) self.r = self.conv2(h) q = self.conv3(self.r) self.v = F.max(q, axis=1, keepdims=True) for i in xrange(self.k - 1): q = self.conv3(self.r) + self.conv3b(self.v) self.v = F.max(q, axis=1, keepdims=True) q = self.conv3(self.r) + self.conv3b(self.v) t = s2 * q.data.shape[3] + s1 q = F.reshape(q, (q.data.shape[0], q.data.shape[1], -1)) q = F.rollaxis(q, 2, 1) t_data_cpu = chainer.cuda.to_cpu(t.data) w = np.zeros(q.data.shape, dtype=np.float32) w[six.moves.range(t_data_cpu.size), t_data_cpu] = 1.0 if isinstance(q.data, chainer.cuda.ndarray): w = chainer.cuda.to_gpu(w) w = chainer.Variable(w, volatile=not self.train) q_out = F.sum(w * q, axis=1) self.ret = self.l3(q_out) return self.ret
def predict(self, xs, softmax=False, argmax=False, get_embed=False, no_dropout=False): xs0, xs1 = xs # premise, hypothesis if get_embed: ys0, exs0 = self.encoder(xs0, get_embed=True) ys1, exs1 = self.encoder(xs1, get_embed=True) else: ys0 = self.encoder(xs0, get_embed=False) ys1 = self.encoder(xs1, get_embed=False) ys0 = [F.max(y, axis=0) for y in ys0] ys1 = [F.max(y, axis=0) for y in ys1] ratio = 0.0 if no_dropout else self.dropout ys0 = F.dropout(F.stack(ys0, axis=0), ratio=ratio) ys1 = F.dropout(F.stack(ys1, axis=0), ratio=ratio) ys = F.concat([ys0, ys1, F.absolute(ys0 - ys1), ys0 * ys1], axis=1) ys = self.output(ys, no_dropout) if softmax: ys = F.softmax(ys).data elif argmax: ys = self.xp.argmax(ys.data, axis=1) if get_embed: return ys, exs0, exs1 return ys
def __call__(self, exs): h_w3 = F.max(self.cnn_w3(exs), axis=2) h_w4 = F.max(self.cnn_w4(exs), axis=2) h_w5 = F.max(self.cnn_w5(exs), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) return h
def compute_shifts(cell, pbc, cutoff): xp = cell.xp reciprocal_cell = F.batch_inv(cell) inv_distances = F.max(F.sqrt(F.sum(reciprocal_cell**2, axis=1)), axis=0) num_repeats = F.ceil(cutoff * inv_distances) num_repeats = F.where(pbc, num_repeats, xp.zeros_like(num_repeats.data)) num_repeats = F.max(num_repeats, axis=0) r1 = xp.arange(1, num_repeats.data[0] + 1) r2 = xp.arange(1, num_repeats.data[1] + 1) r3 = xp.arange(1, num_repeats.data[2] + 1) o = xp.zeros(1, dtype=r1.dtype) return F.vstack([ xp.array([[0.0, 0.0, 0.0]]), cartesian_prod(r1, r2, r3), cartesian_prod(r1, r2, o), cartesian_prod(r1, r2, -r3), cartesian_prod(r1, o, r3), cartesian_prod(r1, o, o), cartesian_prod(r1, o, -r3), cartesian_prod(r1, -r2, r3), cartesian_prod(r1, -r2, o), cartesian_prod(r1, -r2, -r3), cartesian_prod(o, r2, r3), cartesian_prod(o, r2, o), cartesian_prod(o, r2, -r3), cartesian_prod(o, o, r3), ]).data
def __call__(self, xs): self.logger.debug("The length of the batch is {}".format(len(xs))) ## Concat the samples in the batch so they are are the same size, for shorter sentences, use -1 to indicate no word x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) self.logger.debug("The shape of the concatenated batch is {}".format( x_block.shape)) self.logger.debug( "The block shape [0] of the concatenated set is {}".format( x_block[0].shape)) ex_block = block_embed(self.embed, x_block, self.dropout) self.logger.debug( "The embedded block shape of the concatenated set is {}".format( x_block.shape)) self.logger.debug("The first embedded data shape is {}".format( ex_block[0].shape)) h_w3 = F.max(self.cnn_w3(ex_block), axis=2) self.logger.debug("The first h_w3[0] data shape is {}".format( h_w3[0].shape)) self.logger.debug("The first h_w3 data shape is {}".format(h_w3.shape)) h_w4 = F.max(self.cnn_w4(ex_block), axis=2) h_w5 = F.max(self.cnn_w5(ex_block), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) h = F.dropout(h, ratio=self.dropout) h = self.mlp(h) return h
def _do_one(inputs): t, qvs, nqvs, result, mask, hopeful, terminate = inputs if terminate or mask != 1: return None, 0, 0, 0, 0, 0, 0, True action = int(F.argmax(qvs).data) if action == 1: terminate = True reward = 0 if action == 1: if result == 1: reward = self.r_correct else: reward = self.r_rush if hopeful else self.r_wrong elif t == length - 1 and hopeful: reward = self.r_late qv = F.max(qvalues) nqv = F.max(nqvs).data if action == 1: loss = F.square(reward - qv) else: loss = F.square(reward + 0.5 * nqv - qv) r = 0 if action == 1: r = 10 if result == 1 else -5 r_hope = r if hopeful else 0 correct = int(action and result == 1) rush = 1 if (result != 1 and action and hopeful) else 0 late = 1 if (t == length - 1 and not action and hopeful) else 0 return loss, r, r_hope, action, correct, rush, late, terminate
def __call__(self, x, t, train=True, finetune=False): h = self.l1(x, train, finetune) # (3, 20, 26, 26) h = F.dropout(h, self.dr, train) h = self.l2(h, train, finetune) # (3, 20, 24, 24) h = F.max_pooling_2d(h, ksize=2, stride=2, pad=0, cover_all=True, use_cudnn=True) # (3, 20, 12, 12) h = self.l3(h, train, finetune) # (3, 20, 10, 10) h = F.dropout(h, self.dr, train) h = self.l4(h, train, finetune) # (3, 20, 8, 8) h = F.dropout(h, self.dr, train) h = self.l5(h, train, finetune) # (3, 20, 6, 6) h = F.dropout(h, self.dr, train) h = self.l6(h, train, finetune) # (3, 20, 4, 4) h = F.dropout(h, self.dr, train) h = self.top(h) # (3, 10, 1, 1) h = F.max(h, axis=-1, keepdims=False) # (3, 10, 1) h = F.max(h, axis=-1, keepdims=False) # (3, 10) return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def calc(self, x): # --- input transform --- k = self.k gpu = self.gpu edge_feature = ec.edge_conv(x, k, gpu) h, t1 = self.input_transform_net(edge_feature, x) h = ec.edge_conv(h, k, gpu) h = self.conv_block1(h) h = F.max(h, axis=3, keepdims=True) h1 = h h = ec.edge_conv(h, k, gpu) h = self.conv_block2(h) h = F.max(h, axis=3, keepdims=True) h2 = h h = ec.edge_conv(h, k, gpu) h = self.conv_block3(h) h = F.max(h, axis=3, keepdims=True) h3 = h h = ec.edge_conv(h, k, gpu) h = self.conv_block4(h) h = F.max(h, axis=3, keepdims=True) h4 = h h = self.conv_block5(F.concat((h1, h2, h3, h4))) h = F.max(h, axis=2, keepdims=True) h = self.fc_block6(h) h = self.fc_block7(h) h = self.fc8(h) return h, t1
def spartial_pyramid_pooling(self, x): padding = Variable(np.zeros((1, x.shape[2]), dtype=np.float32)) h = [F.expand_dims(F.flatten(F.max(x, axis=3)), axis=0)] length = x.shape[3] for i in range(1, self.spp_level): division = 2**i window_size = length // division if window_size > 0: for j in range(i): h.append( F.expand_dims(F.flatten( F.max(x[:, :, :, (window_size * j):(window_size * (j + 1))], axis=3)), axis=0)) h.append( F.expand_dims(F.flatten( F.max(x[:, :, :, (window_size * i):], axis=3)), axis=0)) else: for j in range(length): h.append(F.expand_dims(F.flatten(x[:, :, :, j]), axis=0)) extend = division - length for j in range(extend): h.append(padding) return (h)
def __call__(self, x, t, train=True, finetune=False): h = self.l1(x, train, finetune) # h = F.dropout(h, self.dr, train) h = self.l2(h, train, finetune) h = plane_group_spatial_max_pooling(h, ksize=2, stride=2, pad=0, cover_all=True, use_cudnn=True) h = self.l3(h, train, finetune) # h = F.dropout(h, self.dr, train) h = self.l4(h, train, finetune) # h = F.dropout(h, self.dr, train) h = self.l5(h, train, finetune) # h = F.dropout(h, self.dr, train) h = self.l6(h, train, finetune) h = self.top(h) h = F.max(h, axis=-3, keepdims=False) h = F.max(h, axis=-1, keepdims=False) h = F.max(h, axis=-1, keepdims=False) return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def _compute_target_q_value(self, batch): with chainer.using_config('train', False), \ chainer.using_config('enable_backprop', False): (_, _, r, s_next, non_terminal) = batch r = F.reshape(r, shape=(*r.shape, 1)) non_terminal = F.reshape(non_terminal, shape=(*non_terminal.shape, 1)) s_next_rep = F.repeat(x=s_next, repeats=self._num_action_samples, axis=0) a_next_rep = self._vae._decode(s_next_rep) perturbed_action = self._target_perturbator(s_next_rep, a_next_rep) q_values = F.stack([ q_target(s_next_rep, perturbed_action) for q_target in self._target_q_ensembles ]) assert q_values.shape == (self._num_q_ensembles, self._batch_size * self._num_action_samples, 1) weighted_q_minmax = self._lambda * F.min(q_values, axis=0) \ + (1 - self._lambda) * F.max(q_values, axis=0) assert weighted_q_minmax.shape == (self._batch_size * self._num_action_samples, 1) next_q_value = F.max(F.reshape(weighted_q_minmax, shape=(self._batch_size, -1)), axis=1, keepdims=True) assert next_q_value.shape == (self._batch_size, 1) target_q_value = r + self._gamma * next_q_value * non_terminal target_q_value.unchain() assert target_q_value.shape == (self._batch_size, 1) return target_q_value
def get_grad(self, xs): x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) ex_block = block_embed(self.embed, x_block, dropout=0.) h_w3 = F.max(self.cnn_w3(ex_block), axis=2) h_w4 = F.max(self.cnn_w4(ex_block), axis=2) h_w5 = F.max(self.cnn_w5(ex_block), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) return self.mlp(h, no_dropout=True), ex_block
def __call__(self, xs): xs = chainer.dataset.convert.concat_examples(xs, padding=0) xs = xs[:,None,:,:] h_w3 = F.max(self.cnn_w3(xs), axis=2) h_w4 = F.max(self.cnn_w4(xs), axis=2) h_w5 = F.max(self.cnn_w5(xs), axis=2) h = F.concat([h_w3,h_w4,h_w5],axis=1) h = F.dropout(F.relu(h),ratio=self.dpout_enc) h = F.squeeze(h) return h
def __call__(self, x, t, train=True, finetune=False): h = self.l1(x, train, finetune) # h = F.dropout(h, self.dr, train) h = F.max(h, axis=-3, keepdims=False) h = self.l2(h, train, finetune) h = F.max(h, axis=-3, keepdims=False) h = F.max_pooling_2d(h, ksize=2, stride=2, pad=0) h = self.l3(h, train, finetune) h = F.max(h, axis=-3, keepdims=False) # h = F.dropout(h, self.dr, train) h = self.l4(h, train, finetune) h = F.max(h, axis=-3, keepdims=False) # h = F.dropout(h, self.dr, train) h = self.l5(h, train, finetune) h = F.max(h, axis=-3, keepdims=False) # h = F.dropout(h, self.dr, train) h = self.l6(h, train, finetune) h = F.max(h, axis=-3, keepdims=False) h = self.top(h) h = F.max(h, axis=-3, keepdims=False) h = F.max(h, axis=-1, keepdims=False) h = F.max(h, axis=-1, keepdims=False) return F.softmax_cross_entropy(h, t), F.accuracy(h, t)
def __call__(self, xs): x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) ex_block = block_embed(self.embed, x_block, self.dropout) h_w3 = F.max(self.cnn_w3(ex_block), axis=2) h_w4 = F.max(self.cnn_w4(ex_block), axis=2) h_w5 = F.max(self.cnn_w5(ex_block), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) h = F.dropout(h, ratio=self.dropout) h = self.mlp(h) return h
def forward(self, xs): x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) ex_block = block_embed(self.embed, x_block, self.dropout) h_w3 = F.max(self.cnn_w3(ex_block), axis=2) h_w4 = F.max(self.cnn_w4(ex_block), axis=2) h_w5 = F.max(self.cnn_w5(ex_block), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) h = F.dropout(h, ratio=self.dropout) h = self.mlp(h) return h
def readout(a, mode='sum', axis=1): if mode == 'sum': a = functions.sum(a, axis=axis) elif mode == 'max': a = functions.max(a, axis=axis) elif mode == 'summax': a_sum = functions.sum(a, axis=axis) a_max = functions.max(a, axis=axis) a = functions.concat((a_sum, a_max), axis=axis) else: raise ValueError('mode {} is not supported'.format(mode)) return a
def __call__(self, x, s1, s2): h = F.relu(self.conv(x)) self.r = self.conv2(h) q = self.conv3(self.r) self.v = F.max(q, axis=1, keepdims=True) for i in xrange(self.k - 1): q = self.conv3(self.r) + self.conv3b(self.v) self.v = F.max(q, asix=1, keepdims=True) q = self.conv3(self.r) + self.conv3b(self.v)
def categorical_kl(params0, params1): params0 = params0[0] params1 = params1[0] assert params0.shape == params1.shape a0 = params0 - F.tile(F.max(params0, axis=1, keepdims=True), (1, 4)) a1 = params1 - F.tile(F.max(params1, axis=1, keepdims=True), (1, 4)) ea0 = F.exp(a0) ea1 = F.exp(a1) z0 = F.tile(F.sum(ea0, axis=1, keepdims=True), (1, 4)) z1 = F.tile(F.sum(ea1, axis=1, keepdims=True), (1, 4)) p0 = ea0 / z0 return F.sum(p0 * (a0 - F.log(z0) - a1 + F.log(z1)), axis=1)
def __call__(self, doc, word): doc = F.relu(self.conv_doc(doc)) doc = F.max(doc, axis=2) word = F.relu(self.conv_word(word)) word = F.max(word, axis=2) clayer = F.concat((doc, word)) clayer = F.squeeze(clayer) y = F.relu(clayer) y = self.l_final(y) return y
def __call__(self, x, e=None): gap = F.average(x, axis=(2, 3)) gmp = F.max(x, axis=(2, 3)) gap = self.ext(F.relu(self.sqz(gap))) gmp = self.ext(F.relu(self.sqz(gmp))) x = F.sigmoid(gap + gmp)[:, :, None, None] * x gap = F.average(x, axis=1)[:, None] gmp = F.max(x, axis=1)[:, None] h = self.conv(F.concat([gap, gmp])) h = F.sigmoid(h) * x return h
def __call__(self, xs, labels=None): x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) ex_block = block_embed(self.embed, x_block, self.dropout) if self.use_predict_embed and chainer.config.train: ex_block = self.embed.embed_xs_with_prediction( xs, labels=labels, batch='concat') h_w3 = F.max(self.cnn_w3(ex_block), axis=2) h_w4 = F.max(self.cnn_w4(ex_block), axis=2) h_w5 = F.max(self.cnn_w5(ex_block), axis=2) h = F.concat([h_w3, h_w4, h_w5], axis=1) h = F.relu(h) h = F.dropout(h, ratio=self.dropout) h = self.mlp(h) return h
def _predict_depth_chainer_backend(self, bgr, depth_bgr=None): bgr_data = np.array([bgr], dtype=np.float32) depth_bgr_data = np.array([depth_bgr], dtype=np.float32) if self.gpu != -1: bgr_data = cuda.to_gpu(bgr_data, device=self.gpu) depth_bgr_data = cuda.to_gpu(depth_bgr_data, device=self.gpu) if LooseVersion(chainer.__version__) < LooseVersion('2.0.0'): bgr = chainer.Variable(bgr_data, volatile=True) depth_bgr = chainer.Variable(depth_bgr_data, volatile=True) self.model(bgr, depth_bgr) else: with chainer.using_config('train', False): with chainer.no_backprop_mode(): bgr = chainer.Variable(bgr_data) depth_bgr = chainer.Variable(depth_bgr_data) self.model(bgr, depth_bgr) proba_img = F.softmax(self.model.mask_score) label_pred = F.argmax(self.model.mask_score, axis=1) depth_pred = F.sigmoid(self.model.depth_score) proba_img = F.transpose(proba_img, (0, 2, 3, 1)) max_proba_img = F.max(proba_img, axis=-1) # squeeze batch axis, gpu -> cpu proba_img = cuda.to_cpu(proba_img.data)[0] max_proba_img = cuda.to_cpu(max_proba_img.data)[0] label_pred = cuda.to_cpu(label_pred.data)[0] depth_pred = cuda.to_cpu(depth_pred.data)[0] # uncertain because the probability is low label_pred[max_proba_img < self.proba_threshold] = self.bg_label # get depth image depth_pred = depth_pred[0, :, :] depth_pred *= (self.model.max_depth - self.model.min_depth) depth_pred += self.model.min_depth return label_pred, proba_img, depth_pred
def compute_vecs(self, word_ids, word_boundaries, phrase_num, char_vecs=None): word_ids = my_variable(word_ids, volatile=not self.train) word_embs = self.emb(word_ids) # total_len x dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, self.emb_dim)) if self.word_level_flag and char_vecs is not None: # print char_vecs.data.shape # print word_embs.data.shape word_embs = F.concat([word_embs, char_vecs], axis=1) # print word_embs.data.shape dim = self.emb_dim + self.add_dim word_embs_reshape = F.reshape(word_embs, (1, 1, -1, dim)) # 1 x 1 x total_len x dim # convolution word_emb_conv = self.conv(word_embs_reshape) # 1 x dim x total_len x 1 word_emb_conv_reshape = F.reshape(word_emb_conv, (self.hidden_dim, -1)) # max word_emb_conv_reshape = F.split_axis(word_emb_conv_reshape, word_boundaries, axis=1) embs = [F.max(word_emb_conv_word, axis=1) for i, word_emb_conv_word in enumerate(word_emb_conv_reshape) if i % 2 == 1] embs = F.concat(embs, axis=0) phrase_emb_conv = F.reshape(embs, (phrase_num, self.hidden_dim)) return phrase_emb_conv
def update(Q, target_Q, opt, samples, gamma=0.99, target_type='double_dqn'): """Update a Q-function with given samples and a target Q-function.""" dtype = chainer.get_dtype() xp = Q.xp obs = xp.asarray([sample[0] for sample in samples], dtype=dtype) action = xp.asarray([sample[1] for sample in samples], dtype=np.int32) reward = xp.asarray([sample[2] for sample in samples], dtype=dtype) done = xp.asarray([sample[3] for sample in samples], dtype=dtype) obs_next = xp.asarray([sample[4] for sample in samples], dtype=dtype) # Predicted values: Q(s,a) y = F.select_item(Q(obs), action) # Target values: r + gamma * max_b Q(s',b) with chainer.no_backprop_mode(): if target_type == 'dqn': next_q = F.max(target_Q(obs_next), axis=1) elif target_type == 'double_dqn': next_q = F.select_item(target_Q(obs_next), F.argmax(Q(obs_next), axis=1)) else: raise ValueError('Unsupported target_type: {}'.format(target_type)) target = reward + gamma * (1 - done) * next_q loss = mean_clipped_loss(y, target) Q.cleargrads() loss.backward() opt.update()
def _train_batch(self, j): j1 = j + 1 s_j = (Variable(self.xp.asarray(self.state_pool[j].astype(np.float32))) / 127.5) - 1 s_j1 = (Variable( self.xp.asarray(self.state_pool[j + 1].astype(np.float32))) / 127.5) - 1 Qhat = self.target_q(s_j1, train=False) max_Q = cuda.to_cpu(F.max(Qhat, axis=1).data) # max_Q = cuda.to_cpu(self.xp.max(Qhat.data, axis=1)) y_j = Variable( self.xp.asarray(self.reward_pool[j] + (1 - self.terminal_pool[j]) * self.gamma * max_Q)) a_j = Variable(self.xp.asarray(self.action_pool[j])) qs = self.action_q(s_j) q_preds = F.select_item(qs, a_j) loss = F.mean_squared_error(y_j, q_preds) self.optimizer.zero_grads() res = loss.backward() loss.unchain_backward() self.optimizer.update() qp_cpu = qs.data # print "loss", loss.data # print np.mean(qp_cpu, axis=0) # print(res) return np.mean(cuda.to_cpu(q_preds.data))
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): gradient_check.check_backward( lambda x: functions.max(x, axis, keepdims), x_data, y_grad, dtype='d', **self.check_backward_options)
def check_forward(self, x_data, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.max(x, axis=axis, keepdims=keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.x.max(axis=axis, keepdims=keepdims) self.assertEqual(y.data.shape, y_expect.shape) testing.assert_allclose(y_expect, y.data)
def _predict(self, img_bgr, depth_bgr): img_bgr_batch = self.xp.array([img_bgr], dtype=self.xp.float32) depth_bgr_batch = self.xp.array([depth_bgr], dtype=self.xp.float32) if self.gpu >= 0: img_bgr_batch = cuda.to_gpu(img_bgr_batch, device=self.gpu) depth_bgr_batch = cuda.to_gpu(depth_bgr_batch, device=self.gpu) with chainer.using_config('train', False): with chainer.no_backprop_mode(): img_bgr_variable = chainer.Variable(img_bgr_batch) depth_bgr_variable = chainer.Variable(depth_bgr_batch) # Do inference self.model(img_bgr_variable, depth_bgr_variable, None, None) # Get proba_img, pred_label, pred_depth proba_img = F.softmax(self.model.score_label) proba_img = F.transpose(proba_img, (0, 2, 3, 1)) max_proba_img = F.max(proba_img, axis=-1) pred_label = F.argmax(self.model.score_label, axis=1) pred_depth = self.model.depth_pred # Squeeze batch axis, gpu -> cpu proba_img = cuda.to_cpu(proba_img.data)[0] max_proba_img = cuda.to_cpu(max_proba_img.data)[0] pred_label = cuda.to_cpu(pred_label.data)[0] pred_depth = cuda.to_cpu(pred_depth.data)[0, 0] # Uncertain because the probability is low pred_label[max_proba_img < self.proba_threshold] = self.bg_label return pred_label, proba_img, pred_depth
def _do_both(inputs): t, qvs, nqvs, result, mask, hopeful, terminate = inputs if terminate or mask != 1: return None, 0, 0, 0, 0, 0, 0, True action = int(F.argmax(qvs).data) # if action == 1: # terminate = True reward = [0, 0] if result == 1: reward[1] = self.r_correct else: reward[1] = self.r_rush if hopeful else self.r_wrong if t == length - 1 and hopeful: reward[0] = self.r_late nqv = F.max(nqvs).data loss = F.square(reward[0] + 0.3 * nqv - qvs[0]) loss += F.square(reward[1] - qvs[1]) r = 0 if action == 1: r = 10 if result == 1 else -5 r_hope = r if hopeful else 0 correct = int(action and result == 1) rush = 1 if (result != 1 and action and hopeful) else 0 late = 1 if (t == length - 1 and not action and hopeful) else 0 return loss, r, r_hope, action, correct, rush, late, terminate
def update_model(self): (s, action, reward, s_next, is_terminal) = self.memory.sample_minibatch(self.minibatch_size) # compute Q targets (max_a' Q_hat(s_next, a')) Q_hat = self.target_network(s_next) Q_hat_max = F.max(Q_hat, axis=1, keepdims=True) y = (1-is_terminal)*self.gamma*Q_hat_max + reward # compute Q(s, action) Q = self.model_network(s) Q_subset = F.reshape(F.select_item(Q, action), (self.minibatch_size, 1)) # compute Huber loss error = y - Q_subset loss_clipped = abs(error) * (abs(error.data) > 1) + (error**2) * (abs(error.data) <= 1) loss = F.sum(loss_clipped) / self.minibatch_size # perform model update self.model_network.zerograds() ## zero out the accumulated gradients in all network parameters loss.backward() self.optimizer.update() # target network tracks the model for dst, src in zip(self.target_network.params(), self.model_network.params()): dst.data = self.tau * src.data + (1 - self.tau) * dst.data return loss.data
def compute_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs, l_done): """ :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|. :param l_act: A chainer variable holding a list of actions. Should be of shape N. :param l_rew: A chainer variable holding a list of rewards. Should be of shape N. :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of shape N * |S|. :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this time step). Should be of shape N. :return: A chainer variable holding a scalar loss. """ # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt # Hint2: Q-function can be called by self._q.forward(argument) # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful "*** YOUR CODE HERE ***" # Ideal next action per state, maximizing value. Qt_greedy = F.max(self._qt.forward(l_next_obs), -1) # Find y y = l_rew + (1 - l_done) * (self._discount * Qt_greedy) # Find Q, our current model. Q = F.select_item(self._q.forward(l_obs), l_act) # Find the total loss from this iteration. loss = F.mean((y - Q) ** 2) return loss
def train_batch(self): j = \ np.random.permutation(min(self.frame, self.pool_size - self.train_term))[:self.batch_size] % self.pool_size j1 = j + 1 s_j = (Variable(self.xp.asarray(self.state_pool[j].astype(np.float32))) / 127.5) - 1 s_j1 = (Variable( self.xp.asarray(self.state_pool[j + 1].astype(np.float32))) / 127.5) - 1 Qhat = self.target_q(s_j1, train=False) max_Q = cuda.to_cpu(F.max(Qhat, axis=1).data) # max_Q = cuda.to_cpu(self.xp.max(Qhat.data, axis=1)) y_j = Variable( self.xp.asarray(self.reward_pool[j] + (1 - self.terminal_pool[j]) * self.gamma * max_Q)) a_j = Variable(self.xp.asarray(self.action_pool[j])) qs = self.action_q(s_j) q_preds = F.select_item(qs, a_j) loss = F.mean_squared_error(y_j, q_preds) self.optimizer.zero_grads() loss.backward() loss.unchain_backward() self.optimizer.update() qp_cpu = qs.data print "Q", np.mean(q_preds.data) print "loss", loss.data print np.mean(qp_cpu, axis=0)
def compute_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs, l_done): """ :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|. :param l_act: A chainer variable holding a list of actions. Should be of shape N. :param l_rew: A chainer variable holding a list of rewards. Should be of shape N. :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of shape N * |S|. :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this time step). Should be of shape N. :return: A chainer variable holding a scalar loss. """ # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt # Hint2: Q-function can be called by self._q.forward(argument) # Hint3: You might also find # https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html # useful # loss = C.Variable(np.array([0.])) # compute target q value named y q_next = self._qt.forward(l_next_obs) # (N, |A|) q_act_next = F.max(q_next, axis=1) y = l_rew + self._discount * q_act_next * (1 - l_done) # (N,) # compute mean square loss function q = self._q.forward(l_obs) # (N, |A|) q_act = F.select_item(q, l_act) loss = F.mean(F.square(q_act - y)) assert isinstance(loss, C.Variable) return loss
def check_forward(self, x_data, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.max(x, axis=axis, keepdims=keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.x.max(axis=axis, keepdims=keepdims) self.assertEqual(y.data.shape, y_expect.shape) gradient_check.assert_allclose(y_expect, y.data)
def check_forward(self, x_data): x = chainer.Variable(x_data) y = functions.max(x, axis=self.axis, keepdims=self.keepdims) self.assertEqual(y.data.dtype, numpy.float32) y_expect = self.y_expect self.assertEqual(y.data.shape, y_expect.shape) testing.assert_allclose(y_expect, y.data)
def normalize_linearly(self, h): """Normalize h linearly over dimensions in [0, 1] """ h_max = F.max(h, axis=1, keepdims=True) h_min = F.min(h, axis=1, keepdims=True) h_norm = (h - h_min) / (h_max - h_min + 1e-10) return h_norm
def __call__(self, x, axis=1): if self.activation is not None: h = self.activation(x) else: h = x if self.mode == 'sum': y = functions.sum(h, axis=axis) elif self.mode == 'max': y = functions.max(h, axis=axis) elif self.mode == 'summax': h_sum = functions.sum(h, axis=axis) h_max = functions.max(h, axis=axis) y = functions.concat((h_sum, h_max), axis=axis) else: raise ValueError('mode {} is not supported'.format(self.mode)) return y
def get_normalized_vector(d, xp=None): shape = tuple(range(1, len(d.shape))) if xp is not None: d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True)) d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True)) else: d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True) d /= F.broadcast_to(d_term, d.shape) d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True)) d /= F.broadcast_to(d_term, d.shape) return d
def check_backward(self, x_data, y_grad, axis=None, keepdims=False): x = chainer.Variable(x_data) y = functions.max(x, axis=axis, keepdims=keepdims) y.grad = y_grad y.backward() func = y.creator f = lambda: func.forward((x.data.copy(),)) gx, = gradient_check.numerical_grad(f, (x.data,), (y.grad,), eps=1e-5) gradient_check.assert_allclose(gx, x.grad, rtol=1e-3, atol=1e-3)
def calculate_score(self, h, pos, neg, pos_score=None, neg_score=None, multipos=False): #h_pro = self.act1(self.W_predict(h)) h_pro = h if multipos: # If multiple positive vectors are given, # max score is picked up. (other ones are not propagated) pos_scoreL = [F.batch_matmul(h_pro, pos_one, transa=True) for pos_one in pos] pos_score = F.max(F.concat(pos_scoreL, axis=1), axis=1, keepdims=True) else: pos_score = F.batch_matmul(h_pro, pos, transa=True) neg_score = F.batch_matmul(h_pro, neg, transa=True) return pos_score, neg_score
def predict(self, xs): # Encoding logits, exs = self._encode(xs) # Discretization D = F.gumbel_softmax(logits, self.tau, axis=2) gumbel_output = D.reshape(-1, self.M * self.K) with chainer.no_backprop_mode(): maxp = F.mean(F.max(D, axis=2)) reporter.report({'maxp': maxp.data}, self) # Decoding y_hat = self._decode(gumbel_output) return y_hat, exs
def solve(self, docD, train=True): old2newD, e2sD = self.initialize_entities(docD["entities"], self.args.max_ent_id, train=train) e2dLD = dict((e, [s]) for (e, s) in e2sD.items()) sentences = self.reload_sentences(docD["sentences"], old2newD) for sent in sentences: i2sD = OrderedDict() e2iLD = defaultdict(list) for i, token in enumerate(sent): if token in e2sD: i2sD[i] = e2sD[token] e2iLD[token].append(i) if not i2sD: # skip sentences without any entities continue e2iLD = OrderedDict(e2iLD) concat_h_L = self.encode_context(sent, i2sD, e2iLD, train=train) for e, concat_h in zip(e2iLD.keys(), concat_h_L): e2dLD[e].append(F.tanh(self.W_hd(concat_h))) e2sD[e] = F.max(F.concat([e2sD[e], e2dLD[e][-1]], axis=0), axis=0, keepdims=True) EPS = sys.float_info.epsilon accum_loss_doc, TorFs, subTorFs = 0, 0, 0 for query, answer in zip(docD["queries"], docD["answers"]): query = self.reload_sentence(query, old2newD) answer = old2newD[int(answer)] i2sD = dict([(i, e2sD[token]) for i, token in enumerate(query) if token in e2sD]) u_Dq, q = self.encode_query(query, i2sD, train=train) eL, sL = zip(*list(e2sD.items())) pre_vL = [self.attention_history(e2dLD[e], q, train=train) for e in eL] v_eDq = self.W_dv(F.concat(pre_vL, axis=0)) answer_idx = eL.index(answer) p = self.predict_answer(u_Dq, v_eDq, [True if token in query else False for token in eL], train=train) + EPS t = chainer.Variable(self.xp.array([answer_idx]).astype(np.int32), volatile=not train) accum_loss_doc += F.softmax_cross_entropy(p, t) p_data = p.data[0, :] max_idx = self.xp.argmax(p_data) TorFs += (max_idx == answer_idx) if max_idx != answer_idx: for sub_ans in [k for k, e in enumerate(eL) if e in query]: p_data[sub_ans] = -10000000 subTorFs += (self.xp.argmax(p_data) == answer_idx) return accum_loss_doc, TorFs, subTorFs
def ordinal_loss(y, mask): xp = cuda.get_array_module(y.data) volatile = y.volatile b, c, n = y.data.shape max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape) y = y - max_y sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape) down_tri = np.tri(c, dtype=np.float32) up_tri = down_tri.T w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile) w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile) h = F.exp(F.expand_dims(y, -1)) h1 = F.convolution_2d(h, w1) h1 = F.convolution_2d(F.log(h1), w1) h2 = F.convolution_2d(h, w2) h2 = F.convolution_2d(F.log(h2), w2) h = F.reshape(h1 + h2, (b, c, n)) return F.sum((h - sum_y - y) * mask) / b
def forward(self, x_data, y_data, train=True, gpu=-1): if gpu >= 0: x_data = cuda.to_gpu(x_data) y_data = cuda.to_gpu(y_data) # ipdb.set_trace() x, t = Variable(x_data), Variable(y_data) # tanhを適用しているけど,sigmoidのほうがいいかも? h1 = F.max(F.tanh(self.conv1(x)), axis=3, keepdims=True) # h2 = F.max(F.sigmoid(self.conv2(x2)), axis=3, keepdims=True) h = F.dropout(F.sigmoid(self.l2(h1)), train=train, ratio=self.drop_ratio) y = self.lo(h) return y,F.softmax_cross_entropy(y, t), F.accuracy(y,t) # class CNNModel(FunctionSet): # def __init__(self, n_vocab=1000, n_units=25, train=True, ratio=0.5, conv_width=3, unit_length=100): # super(ConvolutionEncoder, self).__init__( # n_vocab=n_vocab, n_emb=n_emb, train=train,ratio=ratio # ) # self.conv_width = conv_width # self.vec_len = unit_length # self.n_filters = n_units # self.n_units = n_units # #print(n_units, n_emb) # self.add_link('conv1',L.Convolution2D(1, self.n_filters, (self.conv_width, self.unit_length), stride=(1, self.unit_length), use_cudnn=False)) # #self.to_init.append(self.conv1) # #self.add_link('conv1',L.Convolution2D(1, self.n_filters, (self.conv_width, n_units), stride=(1, n_units), use_cudnn=False)) # def forward(self, x_data, y_data, train=True, gpu=-1): # batchsize=10 # e = F.dropout(self.embed(chainer.Variable(sequence)), train=train) # # shape が,(sequence length, batchsize, vectorlength) になっていたので、 # # swapaxes で(batchsize, sequence length, vectorlength) に修正 # e = F.swapaxes(e,0,1) # #print(e.data.shape) # #e = F.reshape(e, (batchsize, 1, len(sequence), self.vec_len)) # e = F.reshape(e, (batchsize, 1, len(sequence), self.vec_len)) # e = F.tanh(e) # c = self.conv1(e) # h = F.reshape(F.max(c, axis=2), (batchsize, self.n_filters)) # return h
def compute_q_learning_loss(self, l_obs, l_act, l_rew, l_next_obs, l_done): """ :param l_obs: A chainer variable holding a list of observations. Should be of shape N * |S|. :param l_act: A chainer variable holding a list of actions. Should be of shape N. :param l_rew: A chainer variable holding a list of rewards. Should be of shape N. :param l_next_obs: A chainer variable holding a list of observations at the next time step. Should be of shape N * |S|. :param l_done: A chainer variable holding a list of binary values (indicating whether episode ended after this time step). Should be of shape N. :return: A chainer variable holding a scalar loss. """ # Hint: You may want to make use of the following fields: self._discount, self._q, self._qt # Hint2: Q-function can be called by self._q.forward(argument) # Hint3: You might also find https://docs.chainer.org/en/stable/reference/generated/chainer.functions.select_item.html useful "*** YOUR CODE HERE ***" y = l_rew + (1 - l_done) * self._discount * F.max(self._qt.forward(l_next_obs), axis=1) q = F.select_item(self._q.forward(l_obs), l_act) loss = F.mean_squared_error(y, q) return loss
def __call__(self, batch): word_ids, (char_ids, char_boundaries) = batch batch_size = word_ids.data.shape[0] # word lookup table word_embs = self.word_emb(word_ids) # batch x len x dim if self.use_char: # character lookup table char_embs = self.char_emb(char_ids) # total_len x dim char_embs_reshape = F.reshape(char_embs, (1, 1, -1, self.char_emb_dim)) # 1 x 1 x total_len x dim # convolution char_emb_conv = self.char_conv(char_embs_reshape) # 1 x dim x total_len x 1 char_emb_conv_reshape = F.reshape(char_emb_conv, (self.char_hidden_dim, -1)) # dim x total_len # max embs = [] for i, char_emb_conv_word in enumerate(F.split_axis(char_emb_conv_reshape, char_boundaries, axis=1)): if i % 2 == 1: # not pad embs.append(F.max(char_emb_conv_word, axis=1)) char_emb_conv = F.reshape(F.concat(embs, axis=0), (batch_size, -1, self.char_hidden_dim)) # concatenate word_embs = F.concat([word_embs, char_emb_conv], axis=2) # batch x len x dim word_embs_reshape = F.reshape(word_embs, (batch_size, 1, -1, self.word_dim)) h = self.word_conv(word_embs_reshape) # batch x dim x len x 1 #h_transpose = F.swapaxes(h, 1, 2) # TODO: maybe inefficient h_transpose = F.transpose(h, (0, 2, 1, 3)) # TODO: maybe inefficient h_reshape = F.reshape(h_transpose, (-1, self.word_hidden_dim)) y = self.linear(F.relu(h_reshape)) return y
def check_backward(self, x_data, y_grad): gradient_check.check_backward( lambda x: functions.max(x, self.axis, self.keepdims), x_data, y_grad, dtype='d', **self.check_backward_options)
def f(x): return functions.max(x, self.axis, self.keepdims)
def forward(self, inputs): """ Compute context insensitive token embeddings for ELMo representations. Parameters ---------- inputs: ``torch.autograd.Variable`` Shape ``(batch_size, sequence_length, 50)`` of character ids representing the current batch. Returns ------- Dict with keys: ``'token_embedding'``: ``torch.autograd.Variable`` Shape ``(batch_size, sequence_length + 2, embedding_dim)`` tensor with context insensitive token representations. ``'mask'``: ``torch.autograd.Variable`` Shape ``(batch_size, sequence_length + 2)`` long tensor with sequence mask. """ # Add BOS/EOS mask = ((inputs > 0).sum(axis=-1) > 0) character_ids_with_bos_eos, mask_with_bos_eos = add_sentence_boundary_token_ids( inputs, mask, self._beginning_of_sentence_characters, self._end_of_sentence_characters ) # the character id embedding max_chars_per_token = self._options['char_cnn']['max_characters_per_token'] # (batch_size * sequence_length, max_chars_per_token, embed_dim) character_embedding = F.embed_id( character_ids_with_bos_eos.reshape((-1, max_chars_per_token)), self._char_embedding_weights ) # run convolutions cnn_options = self._options['char_cnn'] if cnn_options['activation'] == 'tanh': activation = F.tanh elif cnn_options['activation'] == 'relu': activation = F.relu else: raise ConfigurationError("Unknown activation") # (batch_size * sequence_length, embed_dim, max_chars_per_token) character_embedding = F.transpose(character_embedding, (0, 2, 1)) character_embedding = character_embedding[:, :, :, None] convs = [] for i in range(len(self._convolutions)): conv = getattr(self, 'char_conv_{}'.format(i)) convolved = conv(character_embedding) # (batch_size * sequence_length, n_filters for this width) convolved = F.max(convolved, axis=(2, 3)) convolved = activation(convolved) convs.append(convolved) # (batch_size * sequence_length, n_filters) token_embedding = F.concat(convs, axis=-1) # apply the highway layers (batch_size * sequence_length, n_filters) token_embedding = self._highways.forward(token_embedding) # final projection (batch_size * sequence_length, embedding_dim) token_embedding = self._projection(token_embedding) # reshape to (batch_size, sequence_length, embedding_dim) batch_size, sequence_length, _ = character_ids_with_bos_eos.shape return { 'mask': mask_with_bos_eos, 'token_embedding': token_embedding.reshape((batch_size, sequence_length, -1)) }
def test_invalid_axis_type_in_tuple(self): with self.assertRaises(TypeError): functions.max(self.x, (1, 'x'))
def test_duplicate_axis(self): with self.assertRaises(ValueError): functions.max(self.x, (0, 0))
def f(x): x = functions.max(x, axis, keepdims) return x * x
def test_invalid_axis_type(self): with self.assertRaises(TypeError): functions.max(self.x, [0])
def train(self, par=None): self.pr = min(1.0, 0.02 + self.random_exp / (self.idx + 1.0)) self.upd = self.upd + 1 # generate dataset from replay buffer if self.upd % 3 == 0: self.Qp.copyparams(self.Q) # save the buffer if any if self.r: self.r_buff.append(self.r) self.r = [] # process batches for repeat in range(self.batches): X = [] A = [] Y = [] ln = len(self.r_buff) I = np.random.choice(ln, min(ln, self.batch), replace=False) XQ = [] for i in I: game = self.r_buff[i] for x, a, r in reversed(game): XQ.append(x) XQ = tv(np.row_stack(XQ), v=flag.ON) Qmax = F.max( self.Qp(XQ), axis=1) Qmax = Qmax.data idx = 0 for i in I: game = self.r_buff[i] q_max = 0.0 for x, a, r in reversed(game): y = q_max + r X.append(x) Y.append(y) A.append(a) # update q max q_max = self.d * Qmax[idx] idx += 1 X = tv(np.row_stack(X)) Y = tv(np.squeeze(np.row_stack(Y))) self.Q.zerograds() loss = self.loss(X, Y, A, self.Q) # update the parameters of the agent loss.backward() self.opt.update()
def test_pos_neg_duplicate_axis(self): x_data = numpy.random.uniform(-1, 1, (3, 2, 4)).astype(numpy.float32) x = chainer.Variable(x_data) with self.assertRaises(ValueError): functions.max(x, axis=(1, -2))
def f(x): x = functions.max(x, self.axis, self.keepdims) return x * x