def lstm_without_dropout(n_layer, dropout, hx, cx, ws, bs, xs): xws = [_stack_weight([w[2], w[0], w[1], w[3]]) for w in ws] hws = [_stack_weight([w[6], w[4], w[5], w[7]]) for w in ws] xbs = [_stack_weight([b[2], b[0], b[1], b[3]]) for b in bs] hbs = [_stack_weight([b[6], b[4], b[5], b[7]]) for b in bs] xs = [xs[i] for i in range(3)] ys = [] for x in xs: cx_next = [] hx_next = [] for layer in range(n_layer): c = cx[layer] h = hx[layer] if layer != 0: # Only multiply ratio x = x * (1 / (1.0 - dropout)) lstm_in = functions.linear(x, xws[layer], xbs[layer]) + \ functions.linear(h, hws[layer], hbs[layer]) c_new, h_new = functions.lstm(c, lstm_in) cx_next.append(c_new) hx_next.append(h_new) x = h_new cx = cx_next hx = hx_next ys.append(x) cy = functions.stack(cx) hy = functions.stack(hx) return hy, cy, ys
def beam_search(dec,state,y,data,beam_width,mydict_inv): beam_width=beam_width xp=cuda.cupy batchsize=data.shape[0] vocab_size=len(mydict_inv) topk=20 route = np.zeros((batchsize,beam_width,50)).astype(np.int32) for j in range(50): if j == 0: y = Variable(xp.array(np.argmax(y.data.get(), axis=1)).astype(xp.int32)) state,y = dec(y, state, train=False) h=state['h1'].data c=state['c1'].data h=xp.tile(h.reshape(batchsize,1,-1), (1,beam_width,1)) c=xp.tile(c.reshape(batchsize,1,-1), (1,beam_width,1)) ptr=F.log_softmax(y).data.get() pred_total_city = np.argsort(ptr)[:,::-1][:,:beam_width] pred_total_score = np.sort(ptr)[:,::-1][:,:beam_width] route[:,:,j] = pred_total_city pred_total_city=pred_total_city.reshape(batchsize,beam_width,1) else: pred_next_score=np.zeros((batchsize,beam_width,topk)) pred_next_city=np.zeros((batchsize,beam_width,topk)).astype(np.int32) score2idx=np.zeros((batchsize,beam_width,topk)).astype(np.int32) for b in range(beam_width): state={'c1':Variable(c[:,b,:]), 'h1':Variable(h[:,b,:])} cur_city = xp.array([pred_total_city[i,b,j-1] for i in range(batchsize)]).astype(xp.int32) state,y = dec(cur_city,state, train=False) h[:,b,:]=state['h1'].data c[:,b,:]=state['c1'].data ptr=F.log_softmax(y).data.get() pred_next_score[:,b,:]=np.sort(ptr, axis=1)[:,::-1][:,:topk] pred_next_city[:,b,:]=np.argsort(ptr, axis=1)[:,::-1][:,:topk] h=F.stack([h for i in range(topk)], axis=2).data c=F.stack([c for i in range(topk)], axis=2).data pred_total_city = np.tile(route[:,:,:j],(1,1,topk)).reshape(batchsize,beam_width,topk,j) pred_next_city = pred_next_city.reshape(batchsize,beam_width,topk,1) pred_total_city = np.concatenate((pred_total_city,pred_next_city),axis=3) pred_total_score = np.tile(pred_total_score.reshape(batchsize,beam_width,1),(1,1,topk)).reshape(batchsize,beam_width,topk,1) pred_next_score = pred_next_score.reshape(batchsize,beam_width,topk,1) pred_total_score += pred_next_score idx = pred_total_score.reshape(batchsize,beam_width * topk).argsort(axis=1)[:,::-1][:,:beam_width] pred_total_city = pred_total_city[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,j+1) pred_total_score = pred_total_score[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,1) h = h[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1) c = c[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1) route[:,:,:j+1] =pred_total_city if (pred_total_city[:,:,j] == 15).all(): break return route[:,0,:j+1].tolist()
def __call__(self, x): conditions = [] for i, link in enumerate(self.children()): if i < self.n_deconvolutions: x = F.relu(link(x)) else: conditions.append(link(x)) return F.stack(conditions)
def __call__(self, x): h = F.relu(self.l1(x)) h = F.relu(self.l2(h)) h = F.relu(self.l3(h)) h = F.relu(self.l4(h)) outs = [] for i in range(self.n_task): l = getattr(self, 'task_{}'.format(i)) outs.append(l(h)) return F.stack(outs, axis=1)
def __call__(self, WqUqj, u_qs, hps, put_zero=False): #differ from paper p5: eq(11) #s_j = F.batch_matmul( #F.tanh(WqUqj + F.stack([self.W_vQVQ.W] * batch_size, axis = 0)), #F.broadcast_to(self.W_v.W, [batch_size, self.W_v.W.shape[1]]), #).reshape(batch_size, WqUqj.shape[1]) #it correspond to V_r^Q = zeros() in eq(11) s_j = F.batch_matmul( F.tanh(WqUqj), F.broadcast_to(self.W_v.W, [self.batch_size, self.W_v.W.shape[1]]), ).reshape(self.batch_size, WqUqj.shape[1]) a_i = F.softmax(s_j) rQ = F.batch_matmul(F.stack(u_qs), a_i, transa=True).reshape(self.batch_size, u_qs[0].shape[1]) self.W_f_gru.h = rQ WpHp = F.stack([self.Wp_h(hp) for hp in hps]) hta_new_b_list = [] for index in range(2): s_tj = F.batch_matmul( F.tanh(WpHp + F.stack([self.W_f_gru.h] * WpHp.shape[1], axis=1)), F.broadcast_to(self.W_v.W, [self.batch_size, self.W_v.W.shape[1] ])).reshape(self.batch_size, WpHp.shape[1]) if ((put_zero == True) & (index == 1)): mask = np.ones() for r_index, pt_index in enumerate(pt): mask[r][:pt_index] = 0 s_tj = s_tj * Variable(mask) hta_new_b_list.append(s_tj) at = F.softmax(s_tj) pt = F.argmax(at, axis=1) ct = F.batch_matmul(hps, at, transa=True).reshape(self.batch_size, self.n_units) hta_new = self.W_f_gru(ct) return hta_new_b_list
def predict(self, images, return_visual_backprop=False): with chainer.using_device(self.device): if isinstance(images, list): images = [self.xp.array(image) for image in images] images = self.xp.stack(images, axis=0) visual_backprop = None with chainer.using_config('train', False): roi, bbox = self(images) rois = [roi] bboxes = [bbox] if return_visual_backprop: if not hasattr(self, 'visual_backprop'): self.visual_backprop = VisualBackprop() visual_backprop = self.visual_backprop.perform_visual_backprop( self.visual_backprop_anchors[0]) bboxes = F.stack(bboxes, axis=1) bboxes = F.reshape(bboxes, (-1, ) + bboxes.shape[2:]) rois = F.stack(rois, axis=1) rois = F.reshape(rois, (-1, ) + rois.shape[2:]) return rois, bboxes, visual_backprop
def __call__(self, *args): """Computes the loss value for an input and label pair. It also computes accuracy and stores it to the attribute. Args: args (list of ~chainer.Variable): Input minibatch. The all elements of ``args`` but last one are features and the last element corresponds to ground truth labels. It feeds features to the predictor and compare the result with ground truth labels. Returns: ~chainer.Variable: Loss value. """ assert len(args) >= 2 x = args[:-1] t = args[-1] self.y = None self.loss = None self.losses = [] self.accuracy = None self.y = self.predictor(*x) xp = cuda.get_array_module(*x) reporter.report({'loss': self.loss}, self) Y = F.array.separate.separate(self.y, 2) T = F.array.separate.separate(t, 1) for i, task in enumerate(self.tasks): y = Y[i] t = T[i] y.data = xp.ascontiguousarray(y.data) t.data = xp.ascontiguousarray(t.data) self.losses.append(task['loss_fun'](y, t, use_cudnn=True)) reporter.report({'loss/' + task['name']: self.losses[i]}, self) if self.compute_accuracy: self.accuracy = task['acc_fun'](y, t) reporter.report({'accuracy/' + task['name']: self.accuracy}, self) self.losses = F.stack(self.losses) self.loss = self.multitask(self.losses) reporter.report({'loss': self.loss}, self) return self.loss
def forward(self, *args, **kwargs): if isinstance(self.label_key, int): if not (-len(args) <= self.label_key < len(args)): msg = 'Label key %d is out of bounds' % self.label_key raise ValueError(msg) t = args[self.label_key] if self.label_key == -1: args = args[:-1] else: args = args[:self.label_key] + args[self.label_key + 1:] elif isinstance(self.label_key, str): if self.label_key not in kwargs: msg = 'Label key "%s" is not found' % self.label_key raise ValueError(msg) t = kwargs[self.label_key] del kwargs[self.label_key] self.y = None self.loss = None self.accuracy = None self.y = self.predictor(*args, **kwargs) self.loss = self.lossfun(self.y, t) if -1000 < self.loss.data / self.batch_size < 1000: reporter.report({'loss': self.loss / self.batch_size}, self) # reporter.report({'char_loss': char_loss}, self) else: print('loss f****d up!!!!!!!!!!!!!') if self.compute_accuracy: wer = 0 ys = [y.data[:n] for y, n in zip(F.stack(self.y[0], 1), self.y[1])] target = to_device(-1, t) print(len(ys[0]), len(target[0])) out = remove_blank(F.argmax(ys[0], axis=1).data) out = [int(o) for o in out] print(out) print(target[0]) for yy, tt in zip(ys, target): out = remove_blank(F.argmax(yy, axis=1).data) out = [int(o) for o in out] wer += _wer(out, tt) reporter.report({'accuracy': wer / len(ys)}, self) return self.loss
def seq_rnn_embed(exs, birnn, init_state=None, return_sequences: bool = False): """Embed given sequences using rnn.""" # exs.shape == (..., S, E) seqs = F.reshape(exs, (-1, ) + exs.shape[-2:]) # (X, S, E) toembed = F.separate(seqs, 0) # X x [(S1, E), (S2, E), ...] hs, ys = birnn(init_state, toembed) # (2, X, E), X x [(S1, 2*E), (S2, 2*E), ...] if return_sequences: ys = F.stack(ys) # (X, S, 2*E) ys = F.reshape(ys, exs.shape[:-1] + (-1, )) # (..., S, 2*E) return ys hs = F.moveaxis(hs, 0, -2) # (X, 2, E) hs = F.reshape(hs, exs.shape[:-2] + (-1, )) # (..., 2*E) return hs
def prepare_images(self, images): if self.xp != np: device = images.data.device images = F.copy(images, -1) converted_images = [ resnet.prepare(image.data, size=None) for image in F.separate(images, axis=0) ] converted_images = F.stack(converted_images, axis=0) if self.xp != np: converted_images = F.copy(converted_images, device.id) return converted_images
def get_transform_params(self, features): h = self.pre_transform_params(features) slices = F.split_axis(h, self.num_bboxes_to_localize, axis=1) lstm_predictions = [self.lstm(slice) for slice in slices] lstm_predictions = F.stack(lstm_predictions, axis=1) batch_size, num_boxes, _ = lstm_predictions.shape lstm_predictions = F.reshape(lstm_predictions, (-1, ) + lstm_predictions.shape[2:]) params = self.param_predictor(lstm_predictions) transform_params = rotation_dropout(F.reshape(params, (-1, 2, 3)), ratio=self.dropout_ratio) return transform_params
def __call__(self, global_features, bboxes): resize_shapes = [self.global_shapes for _ in range(len(bboxes))] gaussians = [] xp = cuda.get_array_module(self.x_var.data) for resize_shape, bbox in zip(resize_shapes, bboxes): G = self.get_gaussian(resize_shape, bbox) G = G / (xp.sum(G.data) + 1e-15) gaussians.append(G) gaussians = F.broadcast_to( F.reshape(F.stack(gaussians, axis=0), (global_features.shape[0], global_features.shape[1], 1)), global_features.shape) return F.sum(global_features * gaussians, axis=1)
def get_transform_params(self, features): h = _global_average_pooling_2d(features) lstm_predictions = [ self.lstm(h) for _ in range(self.num_bboxes_to_localize) ] lstm_predictions = F.stack(lstm_predictions, axis=1) batch_size, num_boxes, _ = lstm_predictions.shape lstm_predictions = F.reshape(lstm_predictions, (-1, ) + lstm_predictions.shape[2:]) params = self.param_predictor(lstm_predictions) transform_params = rotation_dropout(F.reshape(params, (-1, 2, 3)), ratio=self.dropout_ratio) return transform_params
def compute_trajectory_loss(self, trajectory, logprobs, values): """Compute the loss for a single trajectory. Args: trajectory -- the trajectory logprobs -- the log probabilities of the actions taken values -- the computed per-state values Returns the per-trajectory losses """ rewards = self._xp.array(trajectory.values.rewards) policy_losses = [] value_losses = [] steps = len(trajectory) - 1 for start in range(0, steps): end = min(start + self._rollout_length, steps) policy_cons, value_cons = self._rollout_consistency( logprobs[start:end], rewards[start:end], values[start], values[end], end - start) policy_losses.append(policy_cons**2) value_losses.append(value_cons**2) policy_loss = F.sum(F.stack(policy_losses)) / 2 value_loss = F.sum(F.stack(value_losses)) / 2 policy_loss /= self._tau policy_loss *= self._policy_loss_coefficient value_loss *= self._value_loss_coefficient if self._normalize_loss_by_steps: policy_loss /= steps value_loss /= steps return policy_loss + value_loss
def euler2mat(r, xp=np): """Converts euler angles to rotation matrix Args: r: rotation angle(x, y, z). Shape is (N, 3). Returns: Rotation matrix corresponding to the euler angles. Shape is (N, 3, 3). """ batchsize = r.shape[0] # start, stop = create_timer() zeros = xp.zeros((batchsize), dtype='f') ones = xp.ones((batchsize), dtype='f') r = F.clip(r, -np.pi, np.pi) cos_r = F.cos(r) sin_r = F.sin(r) zmat = F.stack([ cos_r[:, 2], -sin_r[:, 2], zeros, sin_r[:, 2], cos_r[:, 2], zeros, zeros, zeros, ones ], axis=1).reshape(batchsize, 3, 3) ymat = F.stack([ cos_r[:, 1], zeros, sin_r[:, 1], zeros, ones, zeros, -sin_r[:, 1], zeros, cos_r[:, 1] ], axis=1).reshape(batchsize, 3, 3) xmat = F.stack([ ones, zeros, zeros, zeros, cos_r[:, 0], -sin_r[:, 0], zeros, sin_r[:, 0], cos_r[:, 0] ], axis=1).reshape(batchsize, 3, 3) # #print_timer(start, stop, 'create matrix') # z --> y --> x rotMat = F.batch_matmul(F.batch_matmul(xmat, ymat), zmat) return rotMat
def approximate_cost(x, u, Cf): """ approximate cost function at point(x, u) :param x: time batch n_state :param u: time batch n_ctrl :param Cf:Cost Function need map vector to scalar :return: hessian, grads, costs """ assert x.shape[0] == u.shape[0] assert x.shape[1] == u.shape[1] T = x.shape[0] tau = F.concat((x, u), axis=2) costs = [] hessians = [] grads = [] # for time for t in range(T): tau_t = tau[t] cost = Cf(tau_t) # value of cost function at tau assert list(cost.shape) == [x.shape[1]] # print("cost.shape", cost.shape) grad = chainer.grad([F.sum(cost)], [tau_t], enable_double_backprop=True)[0] # need hessian hessian = [] # for each dimension? for v_i in range(tau.shape[2]): # n_sc grad_line = F.sum(grad[:, v_i]) hessian.append(chainer.grad([grad_line], [tau_t])[0]) hessian = F.stack(hessian, axis=-1) costs.append(cost) # change to near 0?? Is this necessary ??? grads.append(grad - bmv(hessian, tau_t)) hessians.append(hessian) costs = F.stack(costs) grads = F.stack(grads) hessians = F.stack(hessians) return hessians, grads, costs
def check_forward(self, xs_data): xs = [chainer.Variable(x) for x in xs_data] y = functions.stack(xs, axis=self.axis) if hasattr(numpy, 'stack'): # run test only with numpy>=1.10 expect = numpy.stack(self.xs, axis=self.axis) testing.assert_allclose(y.data, expect) y_data = backend.CpuDevice().send(y.data) self.assertEqual(y_data.shape[self.axis], 2) numpy.testing.assert_array_equal( y_data.take(0, axis=self.axis), self.xs[0]) numpy.testing.assert_array_equal( y_data.take(1, axis=self.axis), self.xs[1])
def test(self, test_iter): '''returns a dictionary of buzzes''' device = self.model.get_device() buzzes = dict() for i in range(test_iter.size): batch = test_iter.next_batch(self.model.xp) length, batch_size, _ = batch.vecs.shape qvalues = [self.model(vec) for vec in batch.vecs] # length, batch, 2 actions = F.argmax(F.stack(qvalues), axis=2).data # length, batch actions = actions.T.tolist() for q, a in zip(batch.qids, actions): q = q.tolist() buzzes[q] = -1 if not any(a) else a.index(1) return buzzes
def forward(self, ys, **kwargs): pred_click, pred_cv = self.predict(**kwargs) ys = F.stack(ys) true_click, true_cv = ys[:, 0], ys[:, 1] loss_click = F.mean_squared_error(pred_click, true_click) loss_cv = F.mean_squared_error(pred_cv, true_cv) loss = loss_click + loss_cv reporter.report({'loss': loss.data}, self) reporter.report({'loss_click': loss_click.data}, self) reporter.report({'loss_cv': loss_cv.data}, self) return loss
def __call__(self, sequence_lists, pos_sequence_lists): embedded_sequences = [ self.embeddings(sequence) for sequences in sequence_lists for sequence in sequences ] pos_embedded_sequences = [ self.pos_embeddings(sequence) for sequences in pos_sequence_lists for sequence in sequences ] embedded_sequences = [ F.concat(embedded, axis=1) for embedded in zip(embedded_sequences, pos_embedded_sequences) ] _, _, encoded = self.lstm(None, None, embedded_sequences) # TODO: this could probably be done more efficiently with separate # LSTMs state_size = self.state_size encoded = F.stack([ F.concat( (sequence[-1, :state_size], sequence[0, state_size:]), axis=0) for sequence in encoded ], axis=0) i = 0 pooled = [] for sequences in sequence_lists: pooled.append(F.max(encoded[i:i + len(sequences)], axis=0)) i += len(sequences) pooled = F.stack(pooled, axis=0) hidden = F.dropout(F.relu(self.hidden(pooled)), 0.5) return self.output(hidden)
def differentiate(self, x, enable_double_backprop): """Calculate derivative of the output data w.r.t. input data. Args: x (~chainer.Variable): Input data which has the shape ``(n_sample, n_input)``. enable_double_backprop (bool): Passed to :func:`chainer.grad` to determine whether to create more deep calculation graph or not. """ dy = [chainer.grad([output_node], [x], enable_double_backprop=enable_double_backprop)[0] for output_node in F.moveaxis(self.results['y'], 0, -1)] dy = F.stack(dy, axis=1) self.results['dy'] = dy
def sentence_block_embed(embed, x): """ Change implicitly embed_id function's target to ndim=2 Apply embed_id for array of ndim 2, shape (batchsize, sentence_length), instead for array of ndim 1. """ batch, length = x.shape e = embed(x.reshape((batch * length, ))) # (batch * length, units) e = F.transpose(F.stack(F.split_axis(e, batch, axis=0), axis=0), (0, 2, 1)) # (batch, units, length) return e
def forward(self, xs): # xs shape = (batch, T, F, D) ''' :param xs: appearance features of all boxes feature across all frames :param gs: geometry features of all polygons. each is 4 coordinates represent box :param crf_pact_structures: packaged graph structure contains supplementary information :return: ''' xp = chainer.cuda.get_array_module(xs.data) batch = xs.shape[0] T = xs.shape[1] dim = xs.shape[-1] # first frame node_id ==> other frame node_id in same corresponding box if self.spatial_edge_mode == SpatialEdgeMode.all_edge: input_space = F.reshape(xs, shape=(-1, self.frame_node_num, dim)) # batch x T, F, D input_space = F.separate(input_space, axis=0) _, _, space_out = self.space_lstm(None, None, list(input_space)) temporal_in = F.stack(space_out) # batch * T, F, D else: xs = F.reshape(xs, shape=(-1, self.in_size)) temporal_in = self.transfer_dim_fc(xs) temporal_in = F.reshape( temporal_in, (batch, T, self.frame_node_num, self.mid_size)) # B, T, F, D node_out_dict = self.node_recurrent_forward(temporal_in) # shape = F, B, T, mid_size node_out = F.stack([ node_out_ for _, node_out_ in sorted(node_out_dict.items(), key=lambda e: int(e[0])) ]) node_out = F.transpose(node_out, (1, 2, 0, 3)) # shape = (B,T,F,D) assert self.frame_node_num == node_out.shape[2], node_out.shape[2] assert self.out_size == node_out.shape[-1] assert T == node_out.shape[1] return node_out
def __call__(self, previous_hidden, enc_states): weighted_hidden = self.W_a(previous_hidden) #util.trace('W_a calc hidden: {}'.format(weighted_hidden.shape)) scores = [ self.V_a(chainFunc.tanh(weighted_hidden + self.U_a(hidden))) for hidden in enc_states ] #ここでbatch*sourcelength*1の形にする scores = chainFunc.stack(scores, axis=1) """ util.trace('scores type; {}'.format(type(scores))) util.trace('scores length; {}'.format(len(scores))) util.trace('score type; {}'.format(type(scores[0]))) util.trace('scores shape: {}'.format(scores.shape)) util.trace('score shape; {}'.format(scores[0].shape)) """ align = chainFunc.softmax(scores, axis=1) #util.trace('align shape: {}'.format(align.shape)) stackenc_hidden = chainFunc.stack(enc_states, axis=1) #util.trace('stacking encder state shape: {}'.format(stackenc_hidden.shape)) align_cast = chainFunc.broadcast_to(align, stackenc_hidden.shape) #util.trace('align cast shape: {}'.format(align_cast.shape)) context = chainFunc.sum(align_cast * stackenc_hidden, axis=1) return context
def __call__(self, x): # print(x) # x = list(x) # print(len(x)) # global test # test = x # print(type(x[0].data)) # print(len(x[0].data)) x = list(map(Variable, x)) hy, cy, ys = self.nsteplstm(hx=None, cx=None, xs=x) out = F.stack(ys) # list of batches to a variable out = out[:, -1, :] # The outputs of last timestep of samples in the batch (batch, hidden_size) out = self.fc(out) return out
def forward(self, xin, targets): """Compute total loss to train.""" vctx, vq, va, supps = xin # (B, R, P, C), (B, Q), (B,), (B, I) # --------------------------- # Compute main loss predictions = self.predictor(xin) # (B,) mainloss = F.sigmoid_cross_entropy(predictions, targets) # () acc = F.binary_accuracy(predictions, targets) # () # --------------------------- # Compute aux losses oattloss = F.stack(self.predictor.log['raw_att'], 1) # (B, I, R) oattloss = F.reshape(oattloss, (-1, vctx.shape[1])) # (B*I, R) oattloss = F.softmax_cross_entropy(oattloss, supps.flatten()) # () # --- C.report({'loss': mainloss, 'oatt': oattloss, 'acc': acc}, self) return mainloss + STRONG * oattloss # ()
def conn_recurrent_forward(self, node_out_dict): # xs shape = (N,D) conn_out_dict = dict() for conn_module_id, conn_module in self.top.items(): node_module_id_a, node_module_id_b = conn_module_id.split(",") node_a_out = node_out_dict[node_module_id_a] # B, T, D node_b_out = node_out_dict[node_module_id_b] # B, T, D input = F.concat((node_a_out, node_b_out), axis=2) # B, T, 2D batch_size, seq_len, dim = input.shape input = F.reshape(input, (-1, dim)) input = self.conn_transform_dim_fc(input) input = F.reshape(input, (batch_size, seq_len, self.mid_size)) input = list(F.separate(input, axis=0)) # list of T, D conn_out_dict[conn_module_id] = F.stack( conn_module(input)) # B, T, D return conn_out_dict
def __call__(self, x_lst, mask=None): # x is shape = (batch, T, D) x = F.stack(x_lst) batch, length, unit = x.shape x += self.xp.array(self.position_encoding_block[:, :length, :]) h = self.encoder(x, mask) # self attention shape= batch x len_q x d_model batch, len_q, d_model = h.shape h = F.reshape(h, (batch * len_q, d_model)) h = self.final_linear(h) # shape = B, out_size, len_q h = F.reshape(h, (batch, len_q, self.out_size)) # shape = B, len_q, out_size , then convert to [len_q, out_size] that is list of T,D # return [F.squeeze(e) for e in F.split_axis(F.transpose(h, axes=(0, 2, 1)), 1, axis=0, force_tuple=True)] return [ F.squeeze(e) for e in F.split_axis(h, 1, axis=0, force_tuple=True) ]
def __call__(self, xs, ts): _, ys, ems = self.forward(xs) # PIT loss loss, labels = batch_pit_loss(ys, ts) reporter.report({'loss_pit': loss}, self) report_diarization_error(ys, labels, self) # DPCL loss loss_dc = F.sum(F.stack([dc_loss(em, t) for (em, t) in zip(ems, ts)])) n_frames = np.sum([t.shape[0] for t in ts]) loss_dc = loss_dc / (n_frames**2) reporter.report({'loss_dc': loss_dc}, self) # Multi-objective loss = (1 - self.dc_loss_ratio) * loss + self.dc_loss_ratio * loss_dc reporter.report({'loss': loss}, self) return loss
def _run_fwd_bwd(model, inputs): model.cleargrads() y = model(*inputs) if isinstance(y, (list, tuple)): loss = F.sum(F.stack(y)) y = [chainer.backend.to_chx(x.array) for x in y] else: loss = y y = y.array loss.grad = model.xp.ones(loss.shape, loss.dtype) loss.backward() grads = [] for name, param in sorted(model.namedparams()): name = name.replace('/mc', '') grads.append((name, chainer.backend.to_chx(param.grad))) return y, grads
def calculate_all_attentions(self, hs, ys): '''Calculate all of attentions :return: list of attentions ''' # prepare input and output word sequences with sos/eos IDs eos = self.xp.array([self.eos], 'i') sos = self.xp.array([self.sos], 'i') ys_in = [F.concat([sos, y], axis=0) for y in ys] ys_out = [F.concat([y, eos], axis=0) for y in ys] # padding for ys with -1 # pys: utt x olen pad_ys_in = F.pad_sequence(ys_in, padding=self.eos) pad_ys_out = F.pad_sequence(ys_out, padding=-1) # get length info olength = pad_ys_out.shape[1] # initialization c_list = [None] # list of cell state of each layer z_list = [None] # list of hidden state of each layer for l in six.moves.range(1, self.dlayers): c_list.append(None) z_list.append(None) att_w = None att_ws = [] self.att.reset() # reset pre-computation of h # pre-computation of embedding eys = self.embed(pad_ys_in) # utt x olen x zdim eys = F.separate(eys, axis=1) # loop for an output sequence for i in six.moves.range(olength): att_c, att_w = self.att(hs, z_list[0], att_w) ey = F.hstack((eys[i], att_c)) # utt x (zdim + hdim) c_list[0], z_list[0] = self.lstm0(c_list[0], z_list[0], ey) for l in six.moves.range(1, self.dlayers): c_list[l], z_list[l] = self['lstm%d' % l](c_list[l], z_list[l], z_list[l - 1]) att_ws.append(att_w) # for debugging att_ws = F.stack(att_ws, axis=1) att_ws.to_cpu() return att_ws.data
def inverse(self, y): scale_sqr = self.scale * self.scale batch, y_channels, y_height, y_width = y.shape assert (y_channels % scale_sqr == 0) x_channels = y_channels // scale_sqr x_height = y_height * self.scale x_width = y_width * self.scale x = F.transpose(y, axes=(0, 2, 3, 1)) x = x.reshape(batch, y_height, y_width, scale_sqr, x_channels) d3_split_seq = F.split_axis(x, indices_or_sections=(x.shape[3] // self.scale), axis=3) d3_split_seq = [t.reshape(batch, y_height, x_width, x_channels) for t in d3_split_seq] x = F.stack(d3_split_seq, axis=0) x = F.transpose(F.swapaxes(x, axis1=0, axis2=1), axes=(0, 2, 1, 3, 4)).reshape( batch, x_height, x_width, x_channels) x = F.transpose(x, axes=(0, 3, 1, 2)) return x
def get_points_from_angles(distance, elevation, azimuth, degrees=True): if isinstance(distance, float) or isinstance(distance, int): if degrees: elevation = math.radians(elevation) azimuth = math.radians(azimuth) return (distance * math.cos(elevation) * math.sin(azimuth), distance * math.sin(elevation), -distance * math.cos(elevation) * math.cos(azimuth)) else: if degrees: elevation = radians(elevation) azimuth = radians(azimuth) return cf.stack([ distance * cf.cos(elevation) * cf.sin(azimuth), distance * cf.sin(elevation), -distance * cf.cos(elevation) * cf.cos(azimuth), ]).transpose()
def __call__(self, x): batch, x_channels, x_height, x_width = x.shape y_channels = x_channels * self.scale * self.scale assert (x_height % self.scale == 0) y_height = x_height // self.scale y = F.transpose(x, axes=(0, 2, 3, 1)) d2_split_seq = F.split_axis(y, indices_or_sections=(y.shape[2] // self.scale), axis=2) d2_split_seq = [ t.reshape(batch, y_height, y_channels) for t in d2_split_seq ] y = F.stack(d2_split_seq, axis=1) y = F.transpose(y, axes=(0, 3, 2, 1)) return y
def batch_skew(vec, batch_size=None): """ vec is N x 3, batch_size is int returns N x 3 x 3. Skew_sym version of each matrix. """ xp = vec.xp if batch_size is None: batch_size = vec.shape[0] col_inds = xp.array([1, 2, 3, 5, 6, 7]) indices = F.reshape( F.repeat(col_inds.reshape(1, -1), batch_size, axis=0) + F.repeat(F.reshape(xp.arange(0, batch_size) * 9, [-1, 1]), 6, axis=1), [-1, 1]) updates = F.reshape( F.stack( [-vec[:, 2], vec[:, 1], vec[:, 2], -vec[:, 0], -vec[:, 1], vec[:, 0]], axis=1), [-1]) res = Variable(xp.zeros((batch_size * 3 * 3), 'f')) res.data[indices.reshape(-1).data] = updates.data res = F.reshape(res, [batch_size, 3, 3]) return res
def __call__(self, h): # type: (chainer.Variable) -> chainer.Variable xp = cuda.get_array_module(h) mb, node, ch = h.shape # type: int, int, int if self.q_star is None: self.q_star = [ xp.zeros((1, self.in_channels * 2)).astype('f') for _ in range(mb) ] self.hx, self.cx, q = self.lstm_layer(self.hx, self.cx, self.q_star) # self.hx: (mb, mb, ch) # self.cx: (mb, mb, ch) # q: List[(1, ch) * mb] q = functions.stack(q) # q: (mb, 1, ch) q_ = functions.transpose(q, axes=(0, 2, 1)) # q_: (mb, ch, 1) e = functions.matmul(h, q_) # e: (mb, node, 1) a = functions.softmax(e) # a: (mb, node, 1) a = functions.broadcast_to(a, h.shape) # a: (mb, node, ch) r = functions.sum((a * h), axis=1, keepdims=True) # r: (mb, 1, ch) q_star_ = functions.concat((q, r), axis=2) # q_star_: (mb, 1, ch*2) self.q_star = functions.separate(q_star_) return functions.reshape(q_star_, (mb, ch * 2))
def func(*xs): return functions.stack(xs, self.axis)
def batch_global_rigid_transformation(Rs, Js, parent, rotate_base=False): """ Computes absolute joint locations given pose. rotate_base: if True, rotates the global rotation by 90 deg in x axis. if False, this is the original SMPL coordinate. Args: Rs: N x 24 x 3 x 3 rotation vector of K joints Js: N x 24 x 3, joint locations before posing parent: 24 holding the parent id for each index Returns new_J : `Tensor`: N x 24 x 3 location of absolute joints A : `Tensor`: N x 24 4 x 4 relative joint transformations for LBS. """ xp = Rs.xp N = Rs.shape[0] if rotate_base: print('Flipping the SMPL coordinate frame!!!!') rot_x = Variable( [[1, 0, 0], [0, -1, 0], [0, 0, -1]], dtype=Rs.dtype) rot_x = F.reshape(F.tile(rot_x, [N, 1]), [N, 3, 3]) root_rotation = F.matmul(Rs[:, 0, :, :], rot_x) else: root_rotation = Rs[:, 0, :, :] # Now Js is N x 24 x 3 x 1 Js = F.expand_dims(Js, -1) def make_A(R, t, name=None): # Rs is N x 3 x 3, ts is N x 3 x 1 R_homo = F.pad(R, [[0, 0], [0, 1], [0, 0]], 'constant') t_homo = F.concat([t, xp.ones([N, 1, 1], 'f')], 1) return F.concat([R_homo, t_homo], 2) A0 = make_A(root_rotation, Js[:, 0]) results = [A0] for i in range(1, parent.shape[0]): j_here = Js[:, i] - Js[:, parent[i]] A_here = make_A(Rs[:, i], j_here) res_here = F.matmul( results[parent[i]], A_here) results.append(res_here) # 10 x 24 x 4 x 4 results = F.stack(results, axis=1) new_J = results[:, :, :3, 3] # --- Compute relative A: Skinning is based on # how much the bone moved (not the final location of the bone) # but (final_bone - init_bone) # --- Js_w0 = F.concat([Js, xp.zeros([N, 24, 1, 1], 'f')], 2) init_bone = F.matmul(results, Js_w0) # Append empty 4 x 3: init_bone = F.pad(init_bone, [[0, 0], [0, 0], [0, 0], [3, 0]], 'constant') A = results - init_bone return new_J, results
def __call__(self, beta, theta, get_skin=False, with_a=False): batch_size = beta.shape[0] # 1. Add shape blend shapes # (N x 10) x (10 x 6890*3) = N x 6890 x 3 self.beta_shapedirs = F.matmul(beta, self.shapedirs) v_shaped = F.reshape( F.matmul(beta, self.shapedirs), [-1, self.size[0], self.size[1]]) + \ F.repeat(self.v_template[None, ], batch_size, axis=0) self.v_shaped = v_shaped # 2. Infer shape-dependent joint locations. Jx = F.matmul(v_shaped[:, :, 0], self.J_regressor) Jy = F.matmul(v_shaped[:, :, 1], self.J_regressor) Jz = F.matmul(v_shaped[:, :, 2], self.J_regressor) J = F.stack([Jx, Jy, Jz], axis=2) self.J = J # 3. Add pose blend shapes # N x 24 x 3 x 3 Rs = F.reshape( batch_rodrigues(F.reshape(theta, [-1, 3])), [-1, 24, 3, 3]) self.Rs = Rs # Ignore global rotation. pose_feature = F.reshape(Rs[:, 1:, :, :] - F.repeat(F.repeat(Variable(self.xp.array(self.xp.eye(3), 'f'))[ None, ], 23, axis=0)[None, ], batch_size, axis=0), [-1, 207]) self.pose_feature = pose_feature # (N x 207) x (207, 20670) -> N x 6890 x 3 v_posed = F.reshape( F.matmul(pose_feature, self.posedirs), [-1, self.size[0], self.size[1]]) + v_shaped # 4. Get the global joint location self.J_transformed, A = batch_global_rigid_transformation( Rs, J, self.parents) # 5. Do skinning: # W is N x 6890 x 24 W = F.reshape( F.tile(self.weights, (batch_size, 1)), [batch_size, -1, 24]) # (N x 6890 x 24) x (N x 24 x 16) T = F.reshape( F.matmul(W, F.reshape(A, [batch_size, 24, 16])), [batch_size, -1, 4, 4]) v_posed_homo = F.concat( [v_posed, self.xp.ones([batch_size, v_posed.shape[1], 1], 'f')], 2) v_homo = F.matmul(T, F.expand_dims(v_posed_homo, -1)) verts = v_homo[:, :, :3, 0] # Get cocoplus or lsp joints: joint_x = F.matmul(verts[:, :, 0], self.joint_regressor) joint_y = F.matmul(verts[:, :, 1], self.joint_regressor) joint_z = F.matmul(verts[:, :, 2], self.joint_regressor) joints = F.stack([joint_x, joint_y, joint_z], axis=2) return verts, joints, Rs, A
def _stack_weight(ws): # TODO(unno): Input of the current LSTM implementaiton is shuffled w = functions.stack(ws, axis=1) shape = w.shape return functions.reshape(w, (shape[0] * shape[1],) + shape[2:])
def _lstm_forward(self, inputs, batch_lengths, initial_state=None): """ Parameters ---------- inputs : ``PackedSequence``, required. A batch first ``PackedSequence`` to run the stacked LSTM over. initial_state : ``Tuple[torch.Tensor, torch.Tensor]``, optional, (default = None) A tuple (state, memory) representing the initial hidden state and memory of the LSTM, with shape (num_layers, batch_size, 2 * hidden_size) and (num_layers, batch_size, 2 * cell_size) respectively. Returns ------- output_sequence : ``torch.FloatTensor`` The encoded sequence of shape (num_layers, batch_size, sequence_length, hidden_size) final_states: ``Tuple[torch.FloatTensor, torch.FloatTensor]`` The per-layer final (state, memory) states of the LSTM, with shape (num_layers, batch_size, 2 * hidden_size) and (num_layers, batch_size, 2 * cell_size) respectively. The last dimension is duplicated because it contains the state/memory for both the forward and backward layers. """ if initial_state is None: hidden_states = [None] * len(self.forward_layers) elif initial_state[0].shape[0] != len(self.forward_layers): raise ConfigurationError("Initial states were passed to forward() but the number of " "initial states does not match the number of layers.") else: hidden_states = list(zip(F.split_axis(initial_state[0], initial_state[0].shape[0], 0), F.split_axis(initial_state[1], initial_state[1].shape[0], 0))) inputs = F.pad_sequence(inputs) forward_output_sequence = inputs backward_output_sequence = inputs final_states = [] sequence_outputs = [] for layer_index, state in enumerate(hidden_states): forward_layer = getattr( self, 'forward_layer_{}'.format(layer_index)) backward_layer = getattr( self, 'backward_layer_{}'.format(layer_index)) forward_cache = forward_output_sequence backward_cache = backward_output_sequence if state is not None: forward_hidden_state, backward_hidden_state = F.split_axis( state[0], 2, axis=2) forward_memory_state, backward_memory_state = F.split_axis( state[1], 2, axis=2) forward_state = (forward_hidden_state, forward_memory_state) backward_state = (backward_hidden_state, backward_memory_state) else: forward_state = None backward_state = None forward_output_sequence, forward_state = forward_layer.forward( forward_output_sequence, batch_lengths, forward_state) backward_output_sequence, backward_state = backward_layer.forward( backward_output_sequence, batch_lengths, backward_state) # Skip connections, just adding the input to the output. if layer_index != 0: forward_output_sequence += forward_cache backward_output_sequence += backward_cache sequence_outputs.append(F.concat([forward_output_sequence, backward_output_sequence], -1)) # Append the state tuples in a list, so that we can return # the final states for all the layers. final_states.append((F.concat([forward_state[0], backward_state[0]], -1), F.concat([forward_state[1], backward_state[1]], -1))) stacked_sequence_outputs = F.stack(sequence_outputs, axis=0) # Stack the hidden state and memory for each layer into 2 tensors of shape # (num_layers, batch_size, hidden_size) and (num_layers, batch_size, cell_size) # respectively. final_hidden_states, final_memory_states = zip(*final_states) final_state_tuple = (F.concat(final_hidden_states, 0), F.concat(final_memory_states, 0)) return stacked_sequence_outputs, final_state_tuple