def __call__(self, y, hiddens=None, scale=True): ne_loss = 0 # NE for hiddens if hiddens is not None: for h in hiddens: h_normalized = F.softmax(h) h_log_softmax = F.log_softmax(h) n = h.data.shape[0] l = - F.sum(h_normalized * h_log_softmax) / n if scale: d = np.prod(h.data.shape[1:]) l = l / d ne_loss += l # NE for output y_normalized = F.softmax(y) y_log_softmax = F.log_softmax(y) n = y.data.shape[0] l = - F.sum(y_normalized * y_log_softmax) / n if scale: d = np.prod(y.data.shape[1:]) l = l / d ne_loss += l return ne_loss
def __call__(self, flame, t, previous_h=None, train=False): """ return one-flame loss and h :param x: Movie (Numpy or Cupy Array) :param t: Vector :return: """ print np.shape(flame),np.shape(t) t = chainer.Variable(t) feature = self.feature(np.array([flame],dtype=np.float32)) h = self.context_lstm(feature) # default previous_h is Zero if previous_h is None: previous_h = chainer.Variable(np.zeros((1, class_num), dtype=np.float32)) h = F.concat((h, previous_h)) h = F.dropout(h, ratio=0.5, train=train) h = self.output_lstm_1(h) h = self.output_lstm_2(h) h = self.l_3(h) if train: # loss and previous h return F.softmax_cross_entropy(h, t), F.softmax(h) else: return F.softmax(h)
def dirichlet_likelihood(weights, alpha=None): """ Calculate the log likelihood of the observed topic proportions. A negative likelihood is more likely than a negative likelihood. Args: weights (chainer.Variable): Unnormalized weight vector. The vector will be passed through a softmax function that will map the input onto a probability simplex. alpha (float): The Dirichlet concentration parameter. Alpha greater than 1.0 results in very dense topic weights such that each document belongs to many topics. Alpha < 1.0 results in sparser topic weights. The default is to set alpha to 1.0 / n_topics, effectively enforcing the prior belief that a document belong to very topics at once. Returns: ~chainer.Variable: Output loss variable. """ if type(weights) is Variable: n_topics = weights.data.shape[1] else: n_topics = weights.W.data.shape[1] if alpha is None: alpha = 1.0 / n_topics if type(weights) is Variable: proportions = F.softmax(weights) else: proportions = F.softmax(weights.W) loss = (alpha - 1.0) * F.log(proportions + 1e-8) return -F.sum(loss)
def train(self, x_l0, x_l1, y_l, x_u0, x_u1): # Supervised loss ## Forward of CE loss self.forward_meta_learners() y_pred0 = self.model(x_l0, self.model_params) loss_ce = F.softmax_cross_entropy(y_pred0, y_l) ## Cleargrads for ML self.cleargrad_meta_learners() ## Backward of CE loss loss_ce.backward(retain_grad=True) loss_ce.unchain_backward() ## Update ML self.update_meta_learners() # Semi-supervised loss ## Forward of SR loss self.forward_meta_learners() y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) ## Cleargrads for ML self.cleargrad_meta_learners() ## Backward of SR loss loss_rec.backward(retain_grad=True) loss_rec.unchain_backward() ## Update ML self.update_meta_learners()
def train(self, x_l, y_l, x_u): # Forward # labeled loss y = self.ae.encoder(x_l) loss_ce = F.softmax_cross_entropy(y, y_l) loss_ne_l = self.ne_loss(y) y_prob = F.softmax(y) x_recon = self.ae.decoder(y_prob) loss_recon_l = self.reconstruction(x_recon, x_l, self.ae.encoder.hiddens, self.ae.decoder.hiddens) # unlabeled loss y = self.ae.encoder(x_u) loss_ne_u = self.ne_loss(y) y_prob = F.softmax(y) x_recon = self.ae.decoder(y_prob) loss_recon_u = self.reconstruction(x_recon, x_u, self.ae.encoder.hiddens, self.ae.decoder.hiddens) # sum losses loss = loss_ce + loss_ne_l + loss_recon_l + loss_ne_u + loss_recon_u # Backward and Update self.ae.cleargrads() loss.backward() self.optimizer.update()
def __call__(self, x, im_info): h, n = self.trunk(x), x.data.shape[0] rpn_cls_score = self.rpn_cls_score(h) c, hh, ww = rpn_cls_score.data.shape[1:] rpn_bbox_pred = self.rpn_bbox_pred(h) rpn_cls_score = F.reshape(rpn_cls_score, (n, 2, -1)) # RoI Proposal rpn_cls_prob = F.softmax(rpn_cls_score) rpn_cls_prob_reshape = F.reshape(rpn_cls_prob, (n, c, hh, ww)) rois = self.proposal_layer( rpn_cls_prob_reshape, rpn_bbox_pred, im_info, self.train) boxes = rois[:, 1:5] / im_info[0][2] rois = chainer.Variable(rois, volatile=not self.train) # RCNN pool5 = F.roi_pooling_2d(self.trunk.relu5_3_out, rois, 7, 7, 0.0625) fc6 = F.relu(self.fc6(pool5)) fc7 = F.relu(self.fc7(fc6)) self.scores = F.softmax(self.cls_score(fc7)) box_deltas = self.bbox_pred(fc7).data pred_boxes = bbox_transform_inv(boxes, box_deltas) self.pred_boxes = clip_boxes(pred_boxes, im_info[0][:2]) if self.train: # loss_cls = F.softmax_cross_entropy(cls_score, labels) # huber loss with delta=1 means SmoothL1Loss return None else: return self.scores, self.pred_boxes
def predict_all(self, seq_batch, test=True, argmax=True): self.reset_state() forward_h, backward_h = self.scan(seq_batch, test=test) xp = self.xp seq_batch = seq_batch.T result = [] forgets = [] for i in xrange(len(forward_h)): fh = forward_h[i] bh = backward_h[i] c = seq_batch[i] c = Variable(xp.asanyarray(c, dtype=np.int32)) if fh is None: fh = Variable(xp.zeros(bh.data.shape, dtype=xp.float32)) if bh is None: bh = Variable(xp.zeros(fh.data.shape, dtype=xp.float32)) h = F.concat((fh, bh)) forget = self.forget(h, test=test) forget = F.softmax(forget) out = apply_attention(fh, forget, 0) + apply_attention(bh, forget, 1) if self.fc is not None: out = self.fc(out, test=test) out = F.softmax(out) if xp is cuda.cupy: out.to_cpu() if argmax: ids = np.argmax(out.data, axis=1) else: ids = [np.random.choice(np.arange(out.data.shape[1]), p=output.data[0])] result.append(ids) forgets.append(forget) return result, forgets
def _train(self, x0, x1, y=None, prefix=""): loss = 0 # Cross Entropy Loss y_pred0 = self.model(x0) y_pred1 = self.model(x1) if y is not None: loss_ce = F.softmax_cross_entropy(y_pred0, y) loss += loss_ce # Stochastic Regularization loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) loss += loss_rec self.model.cleargrads() loss.backward() self.optimizer.update() # Save gradients if self.iters % self.iter_epoch == 0: if y is not None: self.epoch += 1 self.model.cleargrads() loss_ce.backward() self.save_grad("ce") else: self.model.cleargrads() loss_rec.backward() self.save_grad("sr")
def translate(model, id2wd, jline): result_words = [] gh = [] for w in jline: wid = model.jvocab[w] x_k = model.embedx(Variable(np.array([wid], dtype=np.int32), volatile='on')) h = model.H(x_k) gh.append(h.data[0]) x_k = model.embedx(Variable(np.array([model.jvocab[EOS]], dtype=np.int32), volatile='on')) h = model.H(x_k) ct = Variable(attention.mk_ct(gh, h.data[0]), volatile='on') h2 = F.tanh(model.Wc1(ct) + model.Wc2(h)) wid = np.argmax(F.softmax(model.W(h2)).data[0]) result_words.append(id2wd.get(wid, wid)) loop = 0 while (wid != model.evocab[EOS]) and (loop <= 30): x_k = model.embedy(Variable(np.array([wid], dtype=np.int32), volatile='on')) h = model.H(x_k) ct = Variable(attention.mk_ct(gh, h.data[0]), volatile='on') h2 = F.tanh(model.Wc1(ct) + model.Wc2(h)) wid = np.argmax(F.softmax(model.W(h2)).data[0]) result_words.append(id2wd.get(wid, wid)) loop += 1 return ' '.join(result_words)
def computerAi_Mlp(self, pos_list, gain_list): # make input(board state) board = [] row = [] print "puttable_mark : " + str(self.puttable_mark) for i in range(0,8): for j in range(0,8): if self.puttable_mark == True and pos_list.count((j,i)) > 0: row.append(3) elif self.getCellState([j,i], (0,0)) == "green": row.append(0) elif self.getCellState([j,i], (0,0)) == "black": row.append(1) elif self.getCellState([j,i], (0,0)) == "white": row.append(2) board.append(row) row = [] for row in board: print row X = np.array([board], dtype=np.float32) # get output if self.now_color == "black": y = F.softmax(gMlpModelBlack.predictor(X)) else: y = F.softmax(gMlpModelWhite.predictor(X)) put_pos_flat = y.data.argmax(1) print "put_pos_flat = " + str(put_pos_flat) # convert pos index if put_pos_flat[0] == 64: # pass put_pos = [] else: put_pos_col = put_pos_flat[0] % 8 put_pos_row = put_pos_flat[0] / 8 put_pos = (put_pos_col, put_pos_row) # judge illegal move or not if len(pos_list) == 0 and len(put_pos) == 0: # 'PASS' successful. put_pos = [] elif len(pos_list) == 0 and len(put_pos) != 0: sys.stderr.write("Illegal move! : Cannot put stone but AI cannot select 'PASS'.\n") put_pos = [] elif len(pos_list) != 0 and len(put_pos) == 0: sys.stderr.write("Illegal move! : Cannot 'PASS' this turn but AI selected it.\n") put_pos = pos_list[0] elif not(put_pos in pos_list): sys.stderr.write("Illegal move! : Cannot put stone at AI selected position.\n") put_pos = pos_list[0] return put_pos
def _train_meta_learner(self, x_l0, x_l1, y_l, x_u0, x_u1): # Stochastic Regularization (i.e, Consistency Loss) y_pred0 = self.model(x_u0, self.model_params) y_pred1 = self.model(x_u1, self.model_params) loss_rec = self.recon_loss(F.softmax(y_pred0), F.softmax(y_pred1)) self.cleargrads() loss_rec.backward() # update learner using loss_rec and meta-learner self.update_parameter_by_meta_learner( self.model_params, loss_rec, x_l0, x_l1, y_l)
def __call__(self, y0, y1): bs = y0.data.shape[0] d = np.prod(y0.data.shape[1:]) y0_softmax = F.softmax(y0) y1_softmax = F.softmax(y1) y0_log_softmax = F.log_softmax(y0) y1_log_softmax = F.log_softmax(y1) kl0 = F.sum(y0_softmax * (y0_log_softmax - y1_log_softmax)) / bs / d kl1 = F.sum(y1_softmax * (y1_log_softmax - y0_log_softmax)) / bs / d return (kl0 + kl1) / 2
def forward_for_losses(self, x_l, y_l, x_u, test=False): """ Returns ----------- tuple: tuple of Variables for separate loss """ # Supervision for (x_l, y_l) y = self.mlp_enc(x_l, test) supervised_loss = self.supervised_loss(y, y_l) # Reconstruction for (x_l, _) y = self.mlp_enc(x_l, test) y_prob = F.softmax(y) x_l_recon = self.mlp_dec(y_prob, test) recon_loss_l = self.recon_loss(x_l_recon, x_l, # Use self, x_l self.mlp_enc.hiddens, self.mlp_dec.hiddens, self.scale_rc) # Negative Entropy for y_l if self.lds: #TODO: add mlp_dec.hiddens? neg_ent_l = self.neg_ent_loss(y, self.mlp_enc.hiddens, scale=self.scale_lds) else: neg_ent_l = self.neg_ent_loss(y, scale=self.scale_lds) if x_u is None: return supervised_loss # Reconstruction for (x_u, _) y = self.mlp_enc(x_u, test) y_prob = F.softmax(y) x_u_recon = self.mlp_dec(y_prob, test) recon_loss_u = self.recon_loss(x_u_recon, x_u, # Use self, x_u self.mlp_enc.hiddens, self.mlp_dec.hiddens, self.scale_rc) # Negative Entropy for y_u if self.lds: #TODO: add mlp_dec.hiddens? neg_ent_u = self.neg_ent_loss(y, self.mlp_enc.hiddens, scale=self.scale_lds) else: neg_ent_u = self.neg_ent_loss(y, scale=self.scale_lds) return supervised_loss, recon_loss_u, neg_ent_u
def forward(self, doc, wrd, window=5): doc, wrd = utils.move(self.xp, doc, wrd) proportions = self.proportions(doc) ld = dirichlet_likelihood(self.proportions.W) context = F.matmul(F.softmax(proportions), self.factors()) loss = self.loss_func(context, wrd) return loss, ld
def __call__(self, x, t=None): h = F.relu(self.bn1_1(self.conv1_1(x), test=not self.train)) h = F.dropout(h, ratio=0.3, train=self.train) h = F.relu(self.bn1_2(self.conv1_2(h), test=not self.train)) h = F.max_pooling_2d(h, 3, stride=3) h = F.relu(self.bn2_1(self.conv2_1(h), test=not self.train)) h = F.dropout(h, ratio=0.4, train=self.train) h = F.relu(self.bn2_2(self.conv2_2(h), test=not self.train)) h = F.max_pooling_2d(h, 3, stride=3) h = F.relu(self.bn3_1(self.conv3_1(h), test=not self.train)) h = F.dropout(h, ratio=0.4, train=self.train) h = F.relu(self.bn3_2(self.conv3_2(h), test=not self.train)) h = F.dropout(h, ratio=0.4, train=self.train) h = F.max_pooling_2d(h, 3, stride=3) h = F.dropout(h, ratio=0.5, train=self.train) h = F.relu(self.bn4(self.fc4(h), test=not self.train)) h = F.dropout(h, ratio=0.5, train=self.train) h = F.relu(self.bn5(self.fc5(h), test=not self.train)) h = F.dropout(h, ratio=0.5, train=self.train) h = self.fc6(h) self.y = h if t is not None: self.loss = F.softmax_cross_entropy(h, t) self.accuracy = F.accuracy(h, t) return self.loss self.pred = F.softmax(self.y) return self.pred
def __call__(self, x, rois, train=False): h = F.relu(self.conv1_1(x)) h = F.relu(self.conv1_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv2_1(h)) h = F.relu(self.conv2_2(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv3_1(h)) h = F.relu(self.conv3_2(h)) h = F.relu(self.conv3_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv4_1(h)) h = F.relu(self.conv4_2(h)) h = F.relu(self.conv4_3(h)) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.conv5_1(h)) h = F.relu(self.conv5_2(h)) h = F.relu(self.conv5_3(h)) h = roi_pooling_2d(h, rois, 7, 7, 0.0625) h = F.dropout(F.relu(self.fc6(h)), train=train, ratio=0.5) h = F.dropout(F.relu(self.fc7(h)), train=train, ratio=0.5) cls_score = F.softmax(self.cls_score(h)) bbox_pred = self.bbox_pred(h) return cls_score, bbox_pred
def _log_prob_words(self, context, temperature=1.0): """ This calculates an softmax over the vocabulary as a function of the dot product of context and word. """ dot = F.matmul(context, F.transpose(self.vocab.W)) prob = F.softmax(dot / temperature) return F.log(prob)
def predict(self, test_x: np.ndarray): test_x = Variable(test_x) self.h1 = F.dropout(F.relu(self.l1(test_x))) self.h2 = F.dropout(F.relu(self.l2(self.h1))) y = self.l3(self.h2) predict_list = list(map(np.argmax, F.softmax(y).data)) return predict_list
def predict(self, x_data, y_data, gpu=-1): if gpu >= 0: x_data = cuda.to_gpu(x_data) y_data = cuda.to_gpu(y_data) x, t = Variable(x_data), Variable(y_data) y = self.__forward(x) return F.softmax(y) #, F.accuracy(y, t)
def encode_x_yz(self, x, apply_softmax_y=True): internal = self.encoder(x) y = self.encoder.head_y(internal) z = self.encoder.head_z(internal) if apply_softmax_y: y = functions.softmax(y) return y, z
def predict(self, data, target=None): pred = self.forward(data, train=False) if target is None: return np.argmax(F.softmax(pred).data, axis=1) else: loss, metric = self.loss(pred, target) return pred, loss, metric
def __call__(self, x, t): h = F.relu(self.bn1_1(self.conv1_1(x), test=not self.train)) h = F.relu(self.bn1_2(self.conv1_2(h), test=not self.train)) h = F.max_pooling_2d(h, 2, 2) h = F.dropout(h, ratio=0.25, train=self.train) h = F.relu(self.bn2_1(self.conv2_1(h), test=not self.train)) h = F.relu(self.bn2_2(self.conv2_2(h), test=not self.train)) h = F.max_pooling_2d(h, 2, 2) h = F.dropout(h, ratio=0.25, train=self.train) h = F.relu(self.bn3_1(self.conv3_1(h), test=not self.train)) h = F.relu(self.bn3_2(self.conv3_2(h), test=not self.train)) h = F.relu(self.bn3_3(self.conv3_3(h), test=not self.train)) h = F.relu(self.bn3_4(self.conv3_4(h), test=not self.train)) h = F.max_pooling_2d(h, 2, 2) h = F.dropout(h, ratio=0.25, train=self.train) h = F.dropout(F.relu(self.fc4(h)), ratio=0.5, train=self.train) h = F.dropout(F.relu(self.fc5(h)), ratio=0.5, train=self.train) h = self.fc6(h) self.pred = F.softmax(h) self.loss = F.softmax_cross_entropy(h, t) self.accuracy = F.accuracy(self.pred, t) if self.train: return self.loss else: return self.pred
def forward_one(x, target, hidden, prev_c, train_flag): # make input window vector distance = window // 2 char_vecs = list() x = list(x) for i in range(distance): x.append('</s>') x.insert(0,'<s>') for i in range(-distance+1 , distance + 2): char = x[target + i] char_id = char2id[char] char_vec = model.embed(get_onehot(char_id)) char_vecs.append(char_vec) concat = F.concat(tuple(char_vecs)) dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag) concat = F.concat((concat, hidden)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden, i_gate, f_gate, o_gate)) prev_c, hidden = F.lstm(prev_c, concat) output = model.output(hidden) dist = F.softmax(output) #print(dist.data, label, np.argmax(dist.data)) #correct = get_onehot(label) #print(output.data, correct.data) return dist
def forward(self, x, q, is_linear=False): # Random Noise for Learing Time invariance if chainer.configuration.config.train: xp = chainer.cuda.get_array_module(x) z = xp.zeros((1,x.shape[1]), dtype=numpy.float32) i = 0 while i<x.shape[0]: if numpy.random.rand(1)[0]<0.1: x = xp.vstack((x[:i], z, x[i:])) i += 1 i += 1 max_knowledge, D = self.temporal_a.shape if len(x)>max_knowledge: x = x[len(x)-max_knowledge:] j = max_knowledge-len(x) if self.pe: a = xp.arange(1,0,-1/D) b = xp.arange(-1,1,2/D) M = a * F.matmul(x[:,:self.V], self.embedid_a) + b * F.matmul(x[:,self.V:], self.embedid_a) + self.temporal_a[j:] C = a * F.matmul(x[:,:self.V], self.embedid_c) + b * F.matmul(x[:,self.V:], self.embedid_c) + self.temporal_c[j:] else: M = F.matmul(x[:,:self.V], self.embedid_a) + self.temporal_a[j:] C = F.matmul(x[:,:self.V], self.embedid_c) + self.temporal_c[j:] U = F.matmul(q.reshape(1,-1), self.embedid_b) for l in range(self.layer): P = F.transpose(F.matmul(M,U[0])) if not is_linear: P = F.softmax(P) O = F.matmul(P,C) if l == self.layer-1: U = U + O else: U = self.H(U) + O return self.W(U) # (1,D)
def __call__(self, x, train=True): # First Convolution c0 = self.bnF1(x) c0 = self.convF1(c0) c0 = F.relu(c0) c0 = self.bnF2(c0) c0 = self.convF2(c0) c0 = F.relu(c0) c0 = self.bnF3(c0) c0 = self.convF3(c0) c0 = F.relu(c0) # Atrous Convolution size 3 cn3 = self.bn1x1_D3(c0) cn3 = self.conv1x1_D3(cn3) cn3 = self.bnD3(cn3) cn3 = self.dilate_conv6(cn3) cn3 = F.relu(cn3) # Atrous Convolution size 6 cn3_con = F.concat((c0, cn3), axis=1) cn6 = self.bn1x1_D6(cn3_con) cn6 = self.conv1x1_D6(cn6) cn6 = self.bnD6(cn6) cn6 = self.dilate_conv6(cn6) cn6 = F.relu(cn6) # Atrous Convolution size 12 cn6_con = F.concat((cn3_con, cn6), axis=1) cn12 = self.bn1x1_D12(cn6_con) cn12 = self.conv1x1_D12(cn12) cn12 = self.bnD12(cn12) cn12 = self.dilate_conv12(cn12) cn12 = F.relu(cn12) # Atrous Convolution size 18 cn12_con = F.concat((cn6_con, cn12), axis=1) cn18 = self.bn1x1_D18(cn12_con) cn18 = self.conv1x1_D18(cn18) cn18 = self.bnD18(cn18) cn18 = self.dilate_conv18(cn18) cn18 = F.relu(cn18) # Atrous Convolution size 24 cn18_con = F.concat((cn12_con, cn18), axis=1) cn24 = self.bn1x1_D24(cn18_con) cn24 = self.conv1x1_D24(cn24) cn24 = self.bnD24(cn24) cn24 = self.dilate_conv24(cn24) cn24 = F.relu(cn24) # Last convolution cn24_con = F.concat((cn18_con, cn24), axis=1) cL = self.bnL(cn24_con) cL = self.convL(cL) out = F.softmax(cL, axis=1) return out
def forward_single(x_data, _size, train=False): datum = x_data[0].transpose([1, 2, 0]) / 255.0 datum = datum.transpose([2, 0, 1]) c, h, w = datum.shape datum = datum.reshape([1, c, h, w]) x = Variable(datum) h = model.conv1(x) h = model.norm1(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv2(h) h = model.norm2(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv3(h) h = model.norm3(h) h = F.relu(h) h = F.average_pooling_2d(h, 3, stride=2) h = model.conv4(h) h = F.softmax(h) y = h.data """ positive 領域 """ fmap = resize(y[0][1], _size).astype(np.float32) return fmap
def test(self, x_l, y_l): y = F.softmax(self.mlp_enc(x_l, test=True)) y_argmax = F.argmax(y, axis=1) acc = F.accuracy(y, y_l) y_l_cpu = cuda.to_cpu(y_l.data) y_argmax_cpu = cuda.to_cpu(y_argmax.data) # Confuction Matrix cm = confusion_matrix(y_l_cpu, y_argmax_cpu) print(cm) # Wrong samples idx = np.where(y_l_cpu != y_argmax_cpu)[0] #print(idx.tolist()) # Generate and Save x_rec = self.mlp_dec(y, test=True) save_incorrect_info(x_rec.data[idx, ], x_l.data[idx, ], y.data[idx, ], y_l.data[idx, ]) # Save model serializers.save_hdf5("./model/mlp_encdec.h5py", self.model) loss = self.forward_for_losses(x_l, y_l, None, test=True) # only measure x_l supervised_loss = loss return acc, supervised_loss
def predict(x): """ :param x: numpy.array """ y, = caffe_func(inputs={'data': x}, outputs=['ip2'], train=False) return F.softmax(y)
def check_forward(self, x_data, t_data, use_cudnn=True): x = functions.softmax(chainer.Variable(x_data)) t = chainer.Variable(t_data) loss = cross_entropy( x, t, use_cudnn=use_cudnn, cache_score=self.cache_score) self.assertEqual(loss.data.shape, ()) self.assertEqual(loss.data.dtype, numpy.float32) self.assertEqual(hasattr(loss.creator, 'y'), self.cache_score) loss_value = float(cuda.to_cpu(loss.data)) # Compute expected value loss_expect = 0.0 count = 0 x = numpy.rollaxis(self.x, 1, self.x.ndim).reshape( (self.t.size, self.x.shape[1])) t = self.t.ravel() for xi, ti in six.moves.zip(x, t): if ti == -1: continue log_z = numpy.ufunc.reduce(numpy.logaddexp, xi) loss_expect -= (xi - log_z)[ti] count += 1 if count == 0: loss_expect = 0.0 else: loss_expect /= count self.assertAlmostEqual(loss_expect, loss_value, places=5)
def compute(s): datum = x_data[0].transpose([1, 2, 0]) / 255.0 datum = rescale(datum, s).astype(np.float32) datum = datum.transpose([2, 0, 1]) c, h, w = datum.shape datum = datum.reshape([1, c, h, w]) x = Variable(datum) h = model.conv1(x) h = model.norm1(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv2(h) h = model.norm2(h) h = F.relu(h) h = F.max_pooling_2d(h, 3, stride=2) h = model.conv3(h) h = model.norm3(h) h = F.relu(h) h = F.average_pooling_2d(h, 3, stride=2) h = model.conv4(h) h = F.softmax(h) y = h.data """ positive 領域 """ fmap = resize(y[0][1], _size).astype(np.float32) global_output.append(fmap)
def __call__(self, x, t=None): n_batch = len(x) assert n_batch == len(t) h = F.relu(self.bn1_1(self.conv1_1(x))) h = F.relu(self.bn1_2(self.conv1_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.bn2_1(self.conv2_1(h))) h = F.relu(self.bn2_2(self.conv2_2(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.bn3_1(self.conv3_1(h))) h = F.relu(self.bn3_2(self.conv3_2(h))) h = F.relu(self.bn3_3(self.conv3_3(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.bn4_1(self.conv4_1(h))) h = F.relu(self.bn4_2(self.conv4_2(h))) h = F.relu(self.bn4_3(self.conv4_3(h))) h = F.max_pooling_2d(h, 2, stride=2) h = F.relu(self.bn5_1(self.conv5_1(h))) h = F.relu(self.bn5_2(self.conv5_2(h))) h = F.relu(self.bn5_3(self.conv5_3(h))) h = F.max_pooling_2d(h, 2, stride=2) if not self.train_conv: h.unchain_backward() h = F.dropout(F.relu(self.fc6(h)), ratio=0.5) h = F.dropout(F.relu(self.fc7(h)), ratio=0.5) h = self.fc8(h) h = h.reshape((-1, 2, self.n_class)) h_prob = F.softmax(h, axis=1)[:, 1, :] self.h_prob = h_prob if t is None: assert not chainer.config.train return half_n = self.n_class / 2 is_singlearm_mask = t[:, half_n] == -1 # loss for single arm h_single = h[is_singlearm_mask][:, :, :half_n] t_single = t[is_singlearm_mask][:, :half_n] # Requires: https://github.com/chainer/chainer/pull/3310 if h_single.data.shape[0] > 0: loss_single = F.softmax_cross_entropy( h_single, t_single, normalize=False) else: loss_single = None # loss for dual arm h_dual = h[~is_singlearm_mask][:, :, half_n:] t_dual = t[~is_singlearm_mask][:, half_n:] # Requires: https://github.com/chainer/chainer/pull/3310 if h_dual.data.shape[0] > 0: loss_dual = F.softmax_cross_entropy( h_dual, t_dual, normalize=False) else: loss_dual = None if loss_single is None: self.loss = loss_dual elif loss_dual is None: self.loss = loss_single else: self.loss = loss_single + loss_dual # calculate acc on CPU h_prob_single = h_prob[is_singlearm_mask][:, :half_n] h_prob_single = chainer.cuda.to_cpu(h_prob_single.data) t_single = chainer.cuda.to_cpu(t_single) h_prob_dual = h_prob[~is_singlearm_mask][:, half_n:] h_prob_dual = chainer.cuda.to_cpu(h_prob_dual.data) t_dual = chainer.cuda.to_cpu(t_dual) label_single = (h_prob_single > self.threshold).astype(self.xp.int32) label_dual = (h_prob_dual > self.threshold).astype(self.xp.int32) acc_single = (t_single == label_single).all(axis=1) acc_single = acc_single.astype(self.xp.int32).flatten() acc_dual = (t_dual == label_dual).all(axis=1) acc_dual = acc_dual.astype(self.xp.int32).flatten() self.acc = self.xp.sum(acc_single) + self.xp.sum(acc_dual) self.acc = self.acc / float(len(acc_single) + len(acc_dual)) chainer.reporter.report({ 'loss': self.loss, 'acc': self.acc, }, self) if chainer.config.train: return self.loss
def __call__(self, x): h1 = F.softmax(self.L1(x)) h2 = F.softmax(self.L2(h1)) h3 = F.sigmoid(self.L3(h2)) return h3
def translate_beam_search(model, source, max_predict_length, vocab_size, beam_width=8, normalization_alpha=0, source_reversed=True, return_all_candidates=False): xp = model.xp if source.ndim == 1: source = xp.reshape(source, (1, -1)) skip_mask = source != ID_PAD batchsize = source.shape[0] # to gpu if xp is cuda.cupy: source = cuda.to_gpu(source) skip_mask = cuda.to_gpu(skip_mask) word_ids = xp.arange(0, vocab_size, dtype=xp.int32) model.reset_state() x = xp.full((beam_width, 1), ID_GO, dtype=xp.int32) # get encoder's last hidden states if isinstance(model, AttentiveSeq2SeqModel): encoder_last_hidden_states, encoder_last_layer_outputs = model.encode( source, skip_mask) else: encoder_last_hidden_states, encoder_last_layer_outputs = model.encode( source, skip_mask), None # copy beam_width times for i, state in enumerate(encoder_last_hidden_states): encoder_last_hidden_states[i] = xp.repeat(state.data, beam_width, axis=0) if encoder_last_layer_outputs is not None: encoder_last_layer_outputs = xp.repeat(encoder_last_layer_outputs.data, beam_width, axis=0) sum_log_p = xp.zeros((beam_width, 1), dtype=xp.float32) skip_mask = xp.repeat(skip_mask, beam_width, axis=0) def argmax_k(array, k): if xp is np: return array.argsort()[-k:][::-1] else: result = [] min_value = xp.amin(array) for n in range(k): result.append(xp.argmax(array)) array[result[-1]] = min_value return result current_beam_width = beam_width candidates = [] log_likelihood = [] for t in range(max_predict_length): model.reset_decoder_state() if isinstance(model, AttentiveSeq2SeqModel): u_t = model.decode(x, encoder_last_hidden_states, encoder_last_layer_outputs, skip_mask, return_last=True) else: u_t = model.decode(x, encoder_last_hidden_states, return_last=True) p_t = F.softmax(u_t) # convert to probability log_p_t = F.log(p_t).data # compute scores if t == 0: score = log_p_t[0] # <go> top_indices = argmax_k(score, current_beam_width) else: score = log_p_t + xp.repeat(sum_log_p, vocab_size, axis=1) score = score.reshape((-1, )) top_indices = argmax_k(score, current_beam_width) backward_table = [-1] * current_beam_width token_table = [-1] * current_beam_width stopped_beams = [] for beam, index in enumerate(top_indices): index = int(index) token = index % vocab_size backward = index // vocab_size backward_table[beam] = backward token_table[beam] = token sum_log_p[beam] += log_p_t[backward, token] if token == ID_EOS: stopped_beams.append(beam) log_likelihood.append(float(sum_log_p[beam])) # concatenate if xp is np: x = xp.append(x, xp.full((current_beam_width, 1), ID_PAD, dtype=xp.int32), axis=1) else: x = xp.concatenate( (x, xp.full((current_beam_width, 1), ID_PAD, dtype=xp.int32)), axis=1) new_x = xp.copy(x) # reconstruct input sequense new_sum_log_p = xp.empty_like(sum_log_p) for beam in range(current_beam_width): new_x[beam, -1] = token_table[beam] backward = backward_table[beam] new_x[beam, :-1] = x[backward, :-1] new_sum_log_p[beam] = sum_log_p[backward] x = new_x sum_log_p = new_sum_log_p # remove stopped beam if len(stopped_beams) > 0: flag = xp.ones((current_beam_width, ), dtype=bool) flag[stopped_beams] = False new_x = x[flag] sum_log_p = sum_log_p[flag] stopped_x = x[xp.invert(flag)] for n in range(len(stopped_x)): candidates.append(stopped_x[n]) x = new_x # slice num_to_remove = len(stopped_beams) for i, state in enumerate(encoder_last_hidden_states): encoder_last_hidden_states[i] = encoder_last_hidden_states[i][ num_to_remove:] if encoder_last_layer_outputs is not None: encoder_last_layer_outputs = encoder_last_layer_outputs[ num_to_remove:] skip_mask = skip_mask[num_to_remove:] current_beam_width -= len(stopped_beams) if current_beam_width <= 0: break assert len(candidates) == len(log_likelihood) num_sampled = len(candidates) # if empty if num_sampled == 0: result = [] for token in (x[0]): result.append(token) return result # compute score scores = np.empty((num_sampled, ), dtype=float) for i, (sequence, log_p) in enumerate(zip(candidates, log_likelihood)): length = sequence.size penalty = math.pow(5 + length, normalization_alpha) / math.pow( 5 + 1, normalization_alpha) score = log_p / penalty scores[i] = score if return_all_candidates == True: result = [[] for i in range(len(candidates))] indices = np.flip(np.argsort(scores), axis=0) for index in indices: for token in (candidates[index]): result[index].append(token) return result best_index = np.argmax(scores) result = [] for token in (candidates[best_index]): result.append(token) return result
def __call__(self, input_x, t, ignore_t): if isinstance(input_x, chainer.Variable): device = cuda.get_device(input_x.data) else: device = cuda.get_device(input_x) xp = self.predictor.xp with device: output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape( output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes + 5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) y = F.sigmoid(y) conf = F.sigmoid(conf) prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # training labels tw = np.zeros(w.shape, dtype=np.float32) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) ty = np.tile(0.5, y.shape).astype(np.float32) # set low learning rate for bounding boxes that have no object if self.seen < self.unstable_seen: box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]) y_shift = np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]) w_anchor = np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]) h_anchor = np.broadcast_to( np.reshape( np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]) x_data = cuda.to_cpu(x.data) y_data = cuda.to_cpu(y.data) w_data = cuda.to_cpu(w.data) h_data = cuda.to_cpu(h.data) best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x_data[batch] + x_shift) / grid_w box_y = (y_data[batch] + y_shift) / grid_h box_w = np.exp(w_data[batch]) * w_anchor / grid_w box_h = np.exp(h_data[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = np.broadcast_to( np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape) truth_box_y = np.broadcast_to( np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape) truth_box_w = np.broadcast_to( np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape) truth_box_h = np.broadcast_to( np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape) ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h))) if len(ious) > 0: ious = np.asarray(ious) best_ious.append(np.max(ious, axis=0)) else: best_ious.append(np.zeros_like(x_data[0])) best_ious = np.array(best_ious) # keep confidence of anchor that has more confidence than threshold tconf[best_ious > self.thresh] = conf.data.get()[ best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # ignored regions are not considered either positive or negative best_ious = [] for batch in range(batch_size): n_truth_boxes = len(ignore_t[batch]) box_x = (x_data[batch] + x_shift) / grid_w box_y = (y_data[batch] + y_shift) / grid_h box_w = np.exp(w_data[batch]) * w_anchor / grid_w box_h = np.exp(h_data[batch]) * h_anchor / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = np.broadcast_to( np.array(ignore_t[batch][truth_index]["x"], dtype=np.float32), box_x.shape) truth_box_y = np.broadcast_to( np.array(ignore_t[batch][truth_index]["y"], dtype=np.float32), box_y.shape) truth_box_w = np.broadcast_to( np.array(ignore_t[batch][truth_index]["w"], dtype=np.float32), box_w.shape) truth_box_h = np.broadcast_to( np.array(ignore_t[batch][truth_index]["h"], dtype=np.float32), box_h.shape) ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h))) if len(ious) > 0: ious = np.asarray(ious) best_ious.append(np.max(ious, axis=0)) else: best_ious.append(np.zeros_like(x_data[0])) best_ious = np.array(best_ious) # do not update confidence for ignored regions tconf[best_ious > self.ignore_thresh] = conf.data.get()[ best_ious > self.ignore_thresh] conf_learning_scale[best_ious > self.ignore_thresh] = 0 # adjust x, y, w, h, conf, prob of anchor boxes that have objects abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["w"]) / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log( float(truth_box["h"]) / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) / grid_h, np.exp( w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp( h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 tx = cuda.to_gpu(tx) ty = cuda.to_gpu(ty) tw = cuda.to_gpu(tw) th = cuda.to_gpu(th) tconf = cuda.to_gpu(tconf) tprob = cuda.to_gpu(tprob) box_learning_scale = cuda.to_gpu(box_learning_scale) conf_learning_scale = cuda.to_gpu(conf_learning_scale) x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2 y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2 w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2 h_loss = F.sum((th - h)**2 * box_learning_scale) / 2 c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2 p_loss = F.sum((tprob - prob)**2) / 2 return x_loss, y_loss, w_loss, h_loss, c_loss, p_loss
self.l1 = L.Linear(4, 10) #入力4,中間層10 self.l2 = L.Linear(10, 10) #中間層10,中間層10 self.l3 = L.Linear(10, 4) #中間層10,出力4 def __call__(self, x): h1 = F.relu(self.l1(x)) h2 = F.relu(self.l2(h1)) y = self.l3(h2) return y epoch = 1000 batchsize = 8 # ニューラルネットワークの登録 model = L.Classifier(MyChain(), lossfun=F.softmax_cross_entropy) chainer.serializers.load_npz("result/out.model", model) # 学習結果の評価 with serial.Serial('COM5') as ser: while True: line = ser.readline() line = line.rstrip().decode('utf-8') data = line.strip().split(",") data = np.array(data, dtype=np.int32) data = data[:4] #次元削減 data = np.array(data, dtype=np.float32) x = chainer.Variable(data.reshape(1, 4)) result = F.softmax(model.predictor(x)) print("input: {}, result: {}".format(data, result.data.argmax()))
def __call__(self, state, x): # update state with input label x if state is None: # make initial states and cumlative probability vector wlm_state, z_wlm = self.wordlm(None, self.xp_word_eos) cumsum_probs = self.xp.cumsum(F.softmax(z_wlm).data, axis=1) new_node = self.lexroot xi = self.space else: wlm_state, cumsum_probs, node = state xi = int(x) if xi == self.space: # inter-word transition if node is not None and node[ 1] >= 0: # check if the node is word end w = self.xp.full(1, node[1], "i") else: # this node is not a word end, which means <unk> w = self.xp_word_unk # update wordlm state and cumlative probability vector wlm_state, z_wlm = self.wordlm(wlm_state, w) cumsum_probs = self.xp.cumsum(F.softmax(z_wlm).data, axis=1) new_node = self.lexroot # move to the tree root elif node is not None and xi in node[0]: # intra-word transition new_node = node[0][xi] elif self.open_vocab: # if no path in the tree, enter open-vocabulary mode new_node = None else: # if open_vocab flag is disabled, return 0 probabilities log_y = self.xp.full((1, self.subword_dict_size), self.logzero, "f") return (wlm_state, None, None), log_y if new_node is not None: succ, wid, wids = new_node # compute parent node probability sum_prob = ((cumsum_probs[:, wids[1]] - cumsum_probs[:, wids[0]]) if wids is not None else 1.0) if sum_prob < self.zero: log_y = self.xp.full((1, self.subword_dict_size), self.logzero, "f") return (wlm_state, cumsum_probs, new_node), log_y # set <unk> probability as a default value unk_prob = (cumsum_probs[:, self.word_unk] - cumsum_probs[:, self.word_unk - 1]) y = self.xp.full((1, self.subword_dict_size), unk_prob * self.oov_penalty, "f") # compute transition probabilities to child nodes for cid, nd in succ.items(): y[:, cid] = (cumsum_probs[:, nd[2][1]] - cumsum_probs[:, nd[2][0]]) / sum_prob # apply word-level probabilies for <space> and <eos> labels if wid >= 0: wlm_prob = (cumsum_probs[:, wid] - cumsum_probs[:, wid - 1]) / sum_prob y[:, self.space] = wlm_prob y[:, self.eos] = wlm_prob elif xi == self.space: y[:, self.space] = self.zero y[:, self.eos] = self.zero log_y = self.xp.log(self.xp.clip(y, self.zero, None)) # clip to avoid log(0) else: # if no path in the tree, transition probability is one log_y = self.xp.zeros((1, self.subword_dict_size), "f") return (wlm_state, cumsum_probs, new_node), log_y
def attn_layer(self, ht, Hs): score = F.matmul(ht, Hs, transb=True) a_t = F.softmax(score) c_t = F.reshape(M.sum(F.scale(Hs, F.transpose(a_t), axis=0), axis=0), (1, ht.shape[1])) return c_t, a_t
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :obj:`(x_min, y_min, x_max, y_max)` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = list() scales = list() for img in imgs: _, H, W = img.shape img = self.prepare(img.astype(np.float32)) scale = img.shape[2] / W prepared_imgs.append(img) scales.append(scale) bboxes = list() labels = list() scores = list() for img, scale in zip(prepared_imgs, scales): img_var = chainer.Variable(self.xp.asarray(img[None]), volatile=chainer.flag.ON) H, W = img_var.shape[2:] roi_cls_locs, roi_scores, rois, _ = self.__call__(img_var, scale=scale, test=True) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.data roi_score = roi_scores.data roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape(-1, self.n_class, 4) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape(-1, 4), roi_cls_loc.reshape(-1, 4)) cls_bbox = cls_bbox.reshape(-1, self.n_class * 4) # clip bounding box cls_bbox[:, slice(0, 4, 2)] = self.xp.clip( cls_bbox[:, slice(0, 4, 2)], 0, W / scale) cls_bbox[:, slice(1, 4, 2)] = self.xp.clip( cls_bbox[:, slice(1, 4, 2)], 0, H / scale) prob = F.softmax(roi_score).data raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, score = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(score) return bboxes, labels, scores
def __call__(self, input_x, t): isVola = input_x.volatile output = self.predictor(input_x) batch_size, _, grid_h, grid_w = output.shape if self.predictor.train == True: self.seen += batch_size x, y, w, h, conf, prob = F.split_axis(F.reshape(output, (batch_size, self.predictor.n_boxes, self.predictor.n_classes+5, grid_h, grid_w)), (1, 2, 3, 4, 5), axis=2) x = F.sigmoid(x) # xのactivation y = F.sigmoid(y) # yのactivation conf = F.sigmoid(conf) # confのactivation prob = F.transpose(prob, (0, 2, 1, 3, 4)) prob = F.softmax(prob) # probabilityのactivation # 教師データの用意 tw = np.zeros(w.shape, dtype=np.float32) # wとhが0になるように学習(e^wとe^hは1に近づく -> 担当するbboxの倍率1) th = np.zeros(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) # 活性化後のxとyが0.5になるように学習() ty = np.tile(0.5, y.shape).astype(np.float32) if self.seen < self.unstable_seen: # centerの存在しないbbox誤差学習スケールは基本0.1 box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) else: box_learning_scale = np.tile(0, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) # confidenceのtruthは基本0、iouがthresh以上のものは学習しない、ただしobjectの存在するgridのbest_boxのみ真のIOUに近づかせる conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.data.copy() # best_anchor以外は学習させない(自身との二乗和誤差 = 0) # 全bboxとtruthのiouを計算(batch単位で計算する) x_shift = Variable(np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]), volatile=isVola) y_shift = Variable(np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]), volatile=isVola) w_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 0], (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:]), volatile=isVola) h_anchor = Variable(np.broadcast_to(np.reshape(np.array(self.anchors, dtype=np.float32)[:, 1], (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:]), volatile=isVola) x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): n_truth_boxes = len(t[batch]) box_x = (x[batch] + x_shift) * 1.0 / grid_w box_y = (y[batch] + y_shift) * 1.0 / grid_h box_w = F.exp(w[batch]) * w_anchor * 1.0 / grid_w box_h = F.exp(h[batch]) * h_anchor * 1.0 / grid_h ious = [] for truth_index in range(n_truth_boxes): truth_box_x = Variable(np.broadcast_to(np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape), volatile=isVola) truth_box_y = Variable(np.broadcast_to(np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape), volatile=isVola) truth_box_w = Variable(np.broadcast_to(np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape), volatile=isVola) truth_box_h = Variable(np.broadcast_to(np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape), volatile=isVola) truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu() ious.append(multi_box_iou(Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h)).data.get()) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) # 一定以上のiouを持つanchorに対しては、confを0に下げないようにする(truthの周りのgridはconfをそのまま維持)。 tconf[best_ious > self.thresh] = conf.data.get()[best_ious > self.thresh] conf_learning_scale[best_ious > self.thresh] = 0 # objectの存在するanchor boxのみ、x、y、w、h、conf、probを個別修正 abs_anchors = self.anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_w = int(float(truth_box["x"]) * grid_w) truth_h = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou(Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index # objectの存在するanchorについて、centerを0.5ではなく、真の座標に近づかせる。anchorのスケールを1ではなく真のスケールに近づかせる。学習スケールを1にする。 box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["w"]) * 1.0 / abs_anchors[truth_n][0]) th[batch, truth_n, :, truth_h, truth_w] = np.log(float(truth_box["h"]) * 1.0 / abs_anchors[truth_n][1]) tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 # IOUの観測 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch][truth_n][0][truth_h][truth_w].data.get() + truth_w) * 1.0 / grid_w, (y[batch][truth_n][0][truth_h][truth_w].data.get() + truth_h) * 1.0 / grid_h, np.exp(w[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][0], np.exp(h[batch][truth_n][0][truth_h][truth_w].data.get()) * abs_anchors[truth_n][1] ) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 10.0 # debug prints maps = F.transpose(prob[batch], (2, 3, 1, 0)).data # print("best confidences and best conditional probability and predicted class of each grid:") # for i in range(grid_h): # for j in range(grid_w): # print("%2d" % (int(conf[batch, :, :, i, j].data.max() * 100)), end=" ") # print(" ", end="") # for j in range(grid_w): # print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].argmax()), end=" ") # print(" ", end="") # for j in range(grid_w): # print("%2d" % (maps[i][j][int(maps[i][j].max(axis=1).argmax())].max()*100), end=" ") # print() # # print("best default iou: %.2f predicted iou: %.2f confidence: %.2f class: %s" % (best_iou, predicted_iou, conf[batch][truth_n][0][truth_h][truth_w].data, t[batch][0]["label"])) # print("-------------------------------") #print("seen = %d" % self.seen) # loss計算 tx, ty, tw, th, tconf, tprob = Variable(tx, volatile=isVola), Variable(ty, volatile=isVola), Variable(tw, volatile=isVola), Variable(th, volatile=isVola), Variable(tconf, volatile=isVola), Variable(tprob, volatile=isVola) box_learning_scale, conf_learning_scale = Variable(box_learning_scale, volatile=isVola), Variable(conf_learning_scale, volatile=isVola) tx.to_gpu(), ty.to_gpu(), tw.to_gpu(), th.to_gpu(), tconf.to_gpu(), tprob.to_gpu() box_learning_scale.to_gpu() conf_learning_scale.to_gpu() x_loss = F.sum((tx - x) ** 2 * box_learning_scale) / 2.0 y_loss = F.sum((ty - y) ** 2 * box_learning_scale) / 2.0 w_loss = F.sum((tw - w) ** 2 * box_learning_scale) / 2.0 h_loss = F.sum((th - h) ** 2 * box_learning_scale) / 2.0 c_loss = F.sum((tconf - conf) ** 2 * conf_learning_scale) / 2.0 p_loss = F.sum((tprob - prob) ** 2) / 2.0 print("x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (F.sum(x_loss).data, F.sum(y_loss).data, F.sum(w_loss).data, F.sum(h_loss).data, F.sum(c_loss).data, F.sum(p_loss).data) ) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss return loss
def _compute_batch_wer_mean(model, source_batch, target_batch, target_vocab_size, argmax=True): xp = model.xp num_calculation = 0 sum_wer = 0 skip_mask = source_batch != ID_PAD batchsize = source_batch.shape[0] target_seq_length = target_batch.shape[1] # to gpu if xp is cuda.cupy: source_batch = cuda.to_gpu(source_batch) target_batch = cuda.to_gpu(target_batch) skip_mask = cuda.to_gpu(skip_mask) word_ids = xp.arange(0, target_vocab_size, dtype=xp.int32) model.reset_state() token = ID_GO x = xp.asarray([[token]]).astype(xp.int32) x = xp.broadcast_to(x, (batchsize, 1)) # get encoder's last hidden states if isinstance(model, AttentiveSeq2SeqModel): encoder_last_hidden_states, encoder_last_layer_outputs = model.encode(source_batch, skip_mask, test=True) else: encoder_last_hidden_states = model.encode(source_batch, skip_mask, test=True) while x.shape[1] < target_seq_length * 2: if isinstance(model, AttentiveSeq2SeqModel): u = model.decode_one_step(x, encoder_last_hidden_states, encoder_last_layer_outputs, skip_mask, test=True) else: u = model.decode_one_step(x, encoder_last_hidden_states, test=True) p = F.softmax(u) # convert to probability # concatenate if xp is np: x = xp.append(x, xp.zeros((batchsize, 1), dtype=xp.int32), axis=1) else: x = xp.concatenate((x, xp.zeros((batchsize, 1), dtype=xp.int32)), axis=1) for n in xrange(batchsize): pn = p.data[n] # argmax or sampling if argmax: token = xp.argmax(pn) else: token = xp.random.choice(word_ids, size=1, p=pn)[0] x[n, -1] = token for n in xrange(batchsize): target_tokens = [] for token in target_batch[n, :]: token = int(token) # to cpu if token == ID_PAD: break if token == ID_EOS: break if token == ID_GO: continue target_tokens.append(token) predict_tokens = [] for token in x[n]: token = int(token) # to cpu if token == ID_EOS: break if token == ID_PAD: break if token == ID_GO: continue predict_tokens.append(token) wer = compute_word_error_rate_of_sequence(target_tokens, predict_tokens) sum_wer += wer num_calculation += 1 return sum_wer / num_calculation
def forward_one_step(self, x_seq, pos, test=True, concat_weight=True, softmax=False): self.reset_state() xp = self.xp length = x_seq.shape[1] if self.gpu: x_seq = cuda.to_gpu(x_seq) if length < 1: if concat_weight: return None, None else: return None, None, None, None sum_loss = 0 former = None latter = None attention_sum = 0 if pos == 0: latter = x_seq[:, 1:] elif pos == length - 1: former = x_seq[:, :pos] else: former = x_seq[:, :pos] latter = x_seq[:, pos + 1:] former_context = None latter_context = None former_attention_weight = None latter_attention_weight = None if former is not None: former_context, former_encode = self.encode_backward(former, test=test) former_attention_weight, former_attention_sum = self.attend( former_context, former_encode, test=test) attention_sum += former_attention_sum if latter is not None: latter_context, latter_encode = self.encode_forward(latter, test=test) latter_attention_weight, latter_attention_sum = self.attend( latter_context, latter_encode, test=test) attention_sum += latter_attention_sum representation = 0 if former_context is not None: for t in xrange(len(former_context)): representation += apply_attention( former_context[t], former_attention_weight[t] / attention_sum) if latter_context is not None: for t in xrange(len(latter_context)): representation += apply_attention( latter_context[t], latter_attention_weight[t] / attention_sum) g = self.f_rg(representation) predicted_char_bef_softmax = self.reader_fc(g) if concat_weight: batchsize = x_seq.shape[0] weight = xp.zeros((batchsize, length), dtype=xp.float32) index = 0 if former_attention_weight is not None: f_length = len(former_attention_weight) for i in xrange(f_length): index = i weight[:, f_length - i - 1] = former_attention_weight[i].data.reshape(-1) index += 1 if latter_attention_weight is not None: for i in xrange(len(latter_attention_weight)): weight[:, index + i + 1] = latter_attention_weight[i].data.reshape(-1) weight /= attention_sum.data if xp is not np: weight = cuda.to_cpu(weight) if softmax: return weight, F.softmax(predicted_char_bef_softmax) else: return weight, predicted_char_bef_softmax else: return former_attention_weight, latter_attention_weight, attention_sum, predicted_char_bef_softmax
def discriminate(self, y, z, apply_softmax=False): merge = self.discriminator.merge_bias(self.discriminator.merge_y(y) + self.discriminator.merge_z(z)) logit = self.discriminator(merge) if apply_softmax: return functions.softmax(logit) return logit
def predict(x): y, = func(inputs={'data': x}, outputs=['fc8'], train=False) return F.softmax(y)
def fwd(self,x): return F.softmax(self.l1(x))
ksize=3, stride=1, pad=1) self.l3 = L.Linear(256, 10) # classify # conv1: 8x8 -> 8x8 # max_pooling_2d(1): 8x8 -> 4x4 # conv2: 4x4 -> 4x4 # max_pooling_2d(2): 4x4 -> 2x2 # 2x2 * 64ch = 256 def __call__(self, x): #h1 = F.max_pooling_2d(F.relu(self.conv1(x)), 2, 2) # 2x2 max-pooling, stride=2 h1 = F.max_pooling_2d(F.relu(self.conv1(x)), ksize=2, stride=2) h2 = F.max_pooling_2d(F.relu(self.conv2(h1)), ksize=2, stride=2) y = self.l3(h2) return y model = L.Classifier(MyChain(), lossfun=F.softmax_cross_entropy) chainer.serializers.load_npz('result/out.model', model) img = Image.open('test.png') img = img.convert('L') img = img.resize((8, 8)) img = 16.0 - np.asarray(img, dtype=np.float32) img = img[np.newaxis, np.newaxis, :, :] x = chainer.Variable(img) y = model.predictor(x) c = F.softmax(y).data.argmax() print(c)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', '-m', type=str, required=True, help='model data, saved by train_ptb.py') parser.add_argument('--primetext', '-p', type=str, default='', help='base text data, used for text generation') parser.add_argument('--seed', '-s', type=int, default=50, help='random seeds for text generation') parser.add_argument('--unit', '-u', type=int, default=650, help='number of units') parser.add_argument('--sample', type=int, default=10, help='negative value indicates NOT use random choice') parser.add_argument('--length', type=int, default=30, help='length of the generated text') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() np.random.seed(args.seed) chainer.config.train = False xp = cuda.cupy if args.gpu >= 0 else np # load vocabulary # vocab = chainer.datasets.get_ptb_words_vocabulary() ivocab = load_vocab("words.csv") vocab = dict() for i, c in enumerate(ivocab): vocab[c] = i # should be same as n_units , described in train.py n_units = args.unit lm = train.RNNForLM(len(ivocab), n_units) model = L.Classifier(lm) serializers.load_npz(args.model, model) if args.gpu >= 0: cuda.get_device_from_id(args.gpu).use() model.to_gpu() model.predictor.reset_state() primetext = '<bos>' # args.primetext if isinstance(primetext, six.binary_type): primetext = primetext.decode('utf-8') if primetext in vocab: prev_word = chainer.Variable(xp.array([vocab[primetext]], xp.int32)) else: print('ERROR: Unfortunately ' + primetext + ' is unknown.') exit() # prob = F.softmax(model.predictor(prev_word)) sys.stdout.write(primetext) prev_word = chainer.Variable(xp.array([vocab[primetext]], xp.int32)) for j in range(10): for i in six.moves.range(args.length): prob = F.softmax(model.predictor(prev_word)) np.random.seed(0) if args.sample > 0: probability = cuda.to_cpu(prob.data)[0].astype(np.float64) probability /= np.sum(probability) index = np.random.choice( range(len(probability)), p=probability) else: index = np.argmax(cuda.to_cpu(prob.data)) if ivocab[index] == '<eos>': sys.stdout.write('<eos>\n\n') prev_word = chainer.Variable( xp.array([vocab["<bos>"]], dtype=xp.int32)) break else: sys.stdout.write(ivocab[index]) prev_word = chainer.Variable(xp.array([index], dtype=xp.int32)) sys.stdout.write('\n')
def forward_one(x, target, label, hidden, prev_c, word_dict, train_flag): # make dict feature vector dict_vec = list() L1 = L2 = L3 = L4 = R1 = R2 = R3 = R4 = I1 = I2 = I3 = I4 = 0 for i in range(len(x[:target])): word_candidate = x[target - (i + 1):target] if word_candidate in word_dict: if len(word_candidate) == 1: L1 = 1 elif len(word_candidate) == 2: L2 = 1 elif len(word_candidate) == 3: L3 = 1 else: L4 = 1 if i == 10: break for i in range(len(x[target:])): word_candidate = x[target:target + i + 1] if word_candidate in word_dict: if len(word_candidate) == 1: R1 = 1 elif len(word_candidate) == 2: R2 = 1 elif len(word_candidate) == 3: R3 = 1 else: R4 = 1 if i == 10: break for i in range(1, 6, 1): for j in range(1, 6, 1): word_candidate = x[target - i:target + j] if word_candidate in word_dict: if len(word_candidate) == 1: I1 = 1 elif len(word_candidate) == 2: I2 = 1 elif len(word_candidate) == 3: I3 = 1 else: I4 = 1 dict_vec = chainer.Variable( np.array([[L1, L2, L3, L4, R1, R2, R3, R4, I1, I2, I3, I4]], dtype=np.float32)) # dict_embed_vec = model.dict_embed(dict_vec) # make input window vector distance = window // 2 s_num = 3 - 1 + window // 2 char_vecs = list() char_type_vecs = list() x = list(x) for i in range(s_num): x.append('</s>') x.insert(0, '<s>') for i in range(-distance, distance + 1): # make char vector # import char uni_gram = x[target + s_num + i] bi_gram = x[target + s_num - 1 + i] + x[target + s_num + i] tri_gram = x[target+s_num-2+i] + \ x[target+s_num-1+i] + x[target+s_num+i] # char2id uni_gram_id = char2id[uni_gram] bi_gram_id = char2id[bi_gram] tri_gram_id = char2id[tri_gram] # id 2 embedding uni_gram_vec = model.embed(get_onehot(uni_gram_id)) bi_gram_vec = model.embed(get_onehot(bi_gram_id)) tri_gram_vec = model.embed(get_onehot(tri_gram_id)) # add all char_vec char_vecs.append(uni_gram_vec) char_vecs.append(bi_gram_vec) char_vecs.append(tri_gram_vec) # make char type vector # import char type uni_gram_type = make_char_type(uni_gram) bi_gram_type = make_char_type( x[target + s_num - 1 + i]) + make_char_type(x[target + s_num + i]) tri_gram_type = make_char_type( x[target + s_num - 2 + i]) + make_char_type(x[target + s_num + i] + make_char_type(x[target + s_num - 2 + i])) # chartype 2 id uni_gram_type_id = char_type2id[uni_gram_type] bi_gram_type_id = char_type2id[bi_gram_type] tri_gram_type_id = char_type2id[tri_gram_type] # id 2 embedding uni_gram_type_vec = model.char_type_embed(get_onehot(uni_gram_type_id)) bi_gram_type_vec = model.char_type_embed(get_onehot(bi_gram_type_id)) tri_gram_type_vec = model.char_type_embed(get_onehot(tri_gram_type_id)) # add all char_type_vec char_type_vecs.append(uni_gram_type_vec) char_type_vecs.append(bi_gram_type_vec) char_type_vecs.append(tri_gram_type_vec) char_concat = F.concat(tuple(char_vecs)) char_type_concat = F.concat(tuple(char_type_vecs)) #dropout_concat = F.dropout(concat, ratio=dropout_rate, train=train_flag) concat = F.concat((char_concat, char_type_concat)) concat = F.concat((concat, hidden)) i_gate = F.sigmoid(model.i_gate(concat)) f_gate = F.sigmoid(model.f_gate(concat)) o_gate = F.sigmoid(model.o_gate(concat)) concat = F.concat((hidden, i_gate, f_gate, o_gate)) prev_c, hidden = F.lstm(prev_c, concat) output = model.output(F.concat((hidden, dict_vec))) dist = F.softmax(output) #print(dist.data, label, np.argmax(dist.data)) correct = get_onehot(label) #print(output.data, correct.data) return np.argmax(dist.data), F.softmax_cross_entropy(output, correct)
def predict(self, imgs): """Detect objects from images. This method predicts objects for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(bboxes, labels, scores)`. * **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \ where :math:`R` is the number of bounding boxes in a image. \ Each bouding box is organized by \ :math:`(y_{min}, x_{min}, y_{max}, x_{max})` \ in the second axis. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the bounding box. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) bboxes = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): img_var = chainer.Variable(self.xp.asarray(img[None])) scale = img_var.shape[3] / size[1] roi_cls_locs, roi_scores, rois, _ = self.__call__( img_var, scale=scale) # We are assuming that batch size is 1. roi_cls_loc = roi_cls_locs.array roi_score = roi_scores.array roi = rois / scale # Convert predictions to bounding boxes in image coordinates. # Bounding boxes are scaled to the scale of the input images. mean = self.xp.tile(self.xp.asarray(self.loc_normalize_mean), self.n_class) std = self.xp.tile(self.xp.asarray(self.loc_normalize_std), self.n_class) roi_cls_loc = (roi_cls_loc * std + mean).astype(np.float32) roi_cls_loc = roi_cls_loc.reshape((-1, self.n_class, 4)) roi = self.xp.broadcast_to(roi[:, None], roi_cls_loc.shape) cls_bbox = loc2bbox(roi.reshape((-1, 4)), roi_cls_loc.reshape((-1, 4))) cls_bbox = cls_bbox.reshape((-1, self.n_class * 4)) # clip bounding box cls_bbox[:, 0::2] = self.xp.clip(cls_bbox[:, 0::2], 0, size[0]) cls_bbox[:, 1::2] = self.xp.clip(cls_bbox[:, 1::2], 0, size[1]) prob = F.softmax(roi_score).array raw_cls_bbox = cuda.to_cpu(cls_bbox) raw_prob = cuda.to_cpu(prob) bbox, label, prob = self._suppress(raw_cls_bbox, raw_prob) bboxes.append(bbox) labels.append(label) scores.append(prob) return bboxes, labels, scores
def predict_proba(self, data): pred = self.forward(data, train=False) return F.softmax(pred).data
def encode_decode_train(self, in_word_list, out_word_list, train=True): xp = cuda.cupy if self.gpuid >= 0 else np self.reset_state() # Add GO_ID, EOS_ID to decoder input decoder_word_list = [GO_ID] + out_word_list + [EOS_ID] # encode list of words/tokens enc_states = self.encode_list(in_word_list, train=train) # initialize decoder LSTM to final encoder state self.set_decoder_state() # decode and compute loss if not train: with chainer.no_backprop_mode(): # convert list of tokens into chainer variable list var_dec = (Variable( xp.asarray(decoder_word_list, dtype=np.int32).reshape( (-1, 1)))) # Initialise first decoded word to GOID pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32)) else: # convert list of tokens into chainer variable list var_dec = (Variable( xp.asarray(decoder_word_list, dtype=np.int32).reshape( (-1, 1)))) # Initialise first decoded word to GOID pred_word = Variable(xp.asarray([GO_ID], dtype=np.int32)) # compute loss self.loss = 0 # decode tokens for next_word_var in var_dec[1:]: self.decode(pred_word, train=train) if self.attn == NO_ATTN: predicted_out = self.out(self[self.lstm_dec[-1]].h) else: # __QUESTION Add attention ######################################################### #### ATTN #### ######################################################### score = F.matmul(self[self.lstm_dec[-1]].h, enc_states.T) at = F.softmax(score) ct = F.reshape( F.sum(F.scale(enc_states, at.T, axis=0), axis=0), (1, enc_states.shape[1])) av = F.tanh( self.avout( F.concat((ct, self[self.lstm_dec[-1]].h), axis=1))) predicted_out = self.out(av) # compute loss prob = F.softmax(predicted_out) pred_word = self.select_word(prob, train=train, sample=False) ''' ___QUESTION-1-DESCRIBE-E-START___ Explain what loss is computed with an example What does this value mean? The model takes softmax cross entropy as loss. Cross-entropy has 2 characteristics: non-negative; the output will approximate to 0 if the actual. This makes it feasible as loss function. Its variant for softmax is used in the model which provides a measure of the difference between the predicted output and the actually expected output. The total loss is computed by accumulating the loss on each predicted word, reflecting the total level of error made in translation. ''' self.loss += F.softmax_cross_entropy(predicted_out, next_word_var) '''___QUESTION-1-DESCRIBE-E-END___''' report({"loss": self.loss}, self) return self.loss
def _predict(self, *args): with chainer.using_config('train', False): with chainer.no_backprop_mode(): return F.softmax(self(*args))
def predict(self, imgs): """Segment object instances from images. This method predicts instance-aware object regions for each image. Args: imgs (iterable of numpy.ndarray): Arrays holding images of shape :math:`(B, C, H, W)`. All images are in CHW and RGB format and the range of their value is :math:`[0, 255]`. Returns: tuple of lists: This method returns a tuple of three lists, :obj:`(masks, labels, scores)`. * **masks**: A list of boolean arrays of shape :math:`(R, H, W)`, \ where :math:`R` is the number of masks in a image. \ Each pixel holds value if it is inside the object inside or not. * **labels** : A list of integer arrays of shape :math:`(R,)`. \ Each value indicates the class of the masks. \ Values are in range :math:`[0, L - 1]`, where :math:`L` is the \ number of the foreground classes. * **scores** : A list of float arrays of shape :math:`(R,)`. \ Each value indicates how confident the prediction is. """ prepared_imgs = [] sizes = [] for img in imgs: size = img.shape[1:] img = self.prepare(img.astype(np.float32)) prepared_imgs.append(img) sizes.append(size) masks = [] labels = [] scores = [] for img, size in zip(prepared_imgs, sizes): with chainer.using_config('train', False), \ chainer.function.no_backprop_mode(): # inference img_var = chainer.Variable(self.xp.array(img[None])) scale = img_var.shape[3] / size[1] roi_ag_seg_scores, _, roi_cls_scores, bboxes, _ = \ self.forward(img_var, scales=[scale]) # We are assuming that batch size is 1. roi_ag_seg_score = chainer.cuda.to_cpu(roi_ag_seg_scores.array) roi_cls_score = chainer.cuda.to_cpu(roi_cls_scores.array) bbox = chainer.cuda.to_cpu(bboxes) # filter bounding boxes with min_size height = bbox[:, 2] - bbox[:, 0] width = bbox[:, 3] - bbox[:, 1] keep_indices = np.where((height >= self.min_drop_size) & (width >= self.min_drop_size))[0] roi_ag_seg_score = roi_ag_seg_score[keep_indices, :, :] roi_cls_score = roi_cls_score[keep_indices] bbox = bbox[keep_indices, :] # scale bbox bbox = bbox / scale # shape: (n_rois, 4) bbox[:, 0::2] = self.xp.clip(bbox[:, 0::2], 0, size[0]) bbox[:, 1::2] = self.xp.clip(bbox[:, 1::2], 0, size[1]) # shape: (n_roi, roi_size, roi_size) roi_seg_prob = F.softmax(roi_ag_seg_score).array[:, 1] roi_cls_prob = F.softmax(roi_cls_score).array roi_seg_prob, bbox, label, roi_cls_prob = mask_voting( roi_seg_prob, bbox, roi_cls_prob, size, self.score_thresh, self.nms_thresh, self.mask_merge_thresh, self.binary_thresh, limit=self.limit, bg_label=0) mask = np.zeros((len(roi_seg_prob), size[0], size[1]), dtype=np.bool) for i, (roi_seg_pb, bb) in enumerate(zip(roi_seg_prob, bbox)): bb = np.round(bb).astype(np.int32) y_min, x_min, y_max, x_max = bb roi_msk_pb = resize( roi_seg_pb.astype(np.float32)[None], (y_max - y_min, x_max - x_min)) roi_msk = (roi_msk_pb > self.binary_thresh)[0] mask[i, y_min:y_max, x_min:x_max] = roi_msk masks.append(mask) labels.append(label) scores.append(roi_cls_prob) return masks, labels, scores
def _predict_class(self, x): y, = self.func(inputs={'data': x}, outputs=['pool4'], train=False) return F.softmax(y)
# 文字の場所を計算 pos_x1 = int(x_pos[i] * (x2 - x1)) pos_y1 = int(y_pos * (y2 - y1)) pos_x2 = pos_x1 + int(w_pos * (x2 - x1)) pos_y2 = pos_y1 + int(h_pos * (y2 - y1)) # 文字の画像を切りだす char_img = crop_img[pos_y1:pos_y2, pos_x1:pos_x2] # 白黒画像にする char_gray = cv2.cvtColor(char_img, cv2.COLOR_BGR2GRAY) char_otsu = cv2.threshold(char_gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) char_bin = char_otsu[1] # カラーモデルを変換してマスクをかける hsv_img = cv2.cvtColor(char_img, cv2.COLOR_BGR2HSV) mask_img = cv2.inRange(hsv_img, mask_color[0], mask_color[1]) char_img = cv2.bitwise_and(255 - char_bin, 255 - char_bin, mask=mask_img) # 画像認識用のサイズにする predict_img = cv2.resize(char_img, (64, 64)) cv2.imwrite("ch" + str(i) + ".jpg", predict_img) predict_batch.append(predict_img.reshape(1, 64, 64)) # 画像認識を行う predict_pixel = np.array(predict_batch, dtype=np.float32) with chainer.using_config('train', False): batch = F.softmax(ocr_net(predict_pixel)) # 画像認識の結果を表示する for i in range(len(batch.data)): index = np.argmax(batch[i].data) code = labels[index] ch = bytearray([code]).decode('sjis') print(ch, '(score:' + str(batch[i][index]) + ')')
def predict(self, x): h1 = F.max_pooling_2d(F.relu(self.conv1(x)), 3) h2 = F.dropout(F.relu(self.l1(h1))) y = self.l2(h2) return F.softmax(y).data
def forward(self, x): h = F.relu(self.conv1(x)) h = F.relu(self.conv2(h)) h = F.max_pooling_2d(h, (1, 2), stride=(1, 2)) h = F.reshape(h, (h.data.shape[0], h.data.shape[2], h.data.shape[1], h.data.shape[3])) h = F.relu(self.conv3(h)) h = F.relu(self.conv4(h)) h = F.max_pooling_2d(h, (5, 3)) h = F.relu(self.conv5(h)) h = F.relu(self.conv6(h)) h = F.max_pooling_2d(h, (1, 2)) h = F.relu(self.conv7(h)) h = F.relu(self.conv8(h)) h = F.max_pooling_2d(h, (1, 2)) h = F.relu(self.conv9(h)) h = F.relu(self.conv10(h)) h = F.max_pooling_2d(h, (1, 2)) #Get power spectrum by FFT. data_shape = (6,) # frequency by 1.25Hz # sampling_rate=1000Hz # freq_names = {'delta','theta','lalpha','halpha','beta','lgamma'}; # freq_bands = [1 4; 4 8; 8 10; 10 13; 13 30; 30 50]; # delta:1.25-3.75: # theta: 5-7.5: # lalpha:8.75-10: # halpha:11.25-12.5: # beta:13.75-30: # lgamma:31.25-50: tmp = cupy.abs(cupy.fft.fft(x)) delta = cupy.average(tmp[:, :, :, 1:4], axis=3) theta = cupy.average(tmp[:, :, :, 4:7], axis=3) lalpha = cupy.average(tmp[:, :, :, 7:9], axis=3) halpha = cupy.average(tmp[:, :, :, 9:11], axis=3) beta = cupy.average(tmp[:, :, :, 11:25], axis=3) lgamma = cupy.average(tmp[:, :, :, 25:41], axis=3) Sum = delta + theta + lalpha + halpha + beta + lgamma power_spectral = cupy.zeros((x.shape[0], x.shape[1], x.shape[2], 6)) power_spectral[:, :, :, 0] = cupy.divide(delta, Sum) power_spectral[:, :, :, 1] = cupy.divide(theta, Sum) power_spectral[:, :, :, 2] = cupy.divide(lalpha, Sum) power_spectral[:, :, :, 3] = cupy.divide(halpha, Sum) power_spectral[:, :, :, 4] = cupy.divide(beta, Sum) power_spectral[:, :, :, 5] = cupy.divide(lgamma, Sum) power_spectral = chainer.Variable(power_spectral) power_spectral = F.cast(power_spectral, cupy.float32) h = F.reshape(h, (h.shape[0], h.shape[1] * h.shape[2] * h.shape[3])) power_spectral = F.reshape( power_spectral, (power_spectral.shape[0], power_spectral.shape[1] * power_spectral.shape[2] * power_spectral.shape[3])) h = F.relu(self.norm1(self.fc11(h))) h = F.dropout(h) h = F.relu(self.norm2(self.fc12(h))) h = F.dropout(h) #Concatenate the features extracted by deep neural network and relative power spectrum h = F.concat((h, power_spectral), axis=1) h = self.fc13(h) if chainer.config.train: return h return F.softmax(h)
def forward(self, x): h = self.embed(x) h = self.lstm1(h) h = self.out(h) pred = F.softmax(h) return pred
def _bernoulli_softmax_crossentropy(self, x, y): """ x: prediction. unnormalized distribution. y: teacher """ return -F.sum(y * F.log_softmax(x) + (1 - y) * F.log(1 - F.softmax(x)))
x_train = numpy.array(tmp) N_test = x_test.shape[0] N_train = x_train.shape[0] batchsize = 22 logger.info("Applying batch normalization") for i in xrange(0, N_train, batchsize): x_batch = x_train[i:i + batchsize] model.forward(x_batch, test=False) logger.info("Extracting final layer") save_to = args.save_to X = [] for i in xrange(0, N_test): utt_id = utt_ids_tst[i] x_batch = x_test[i:i + 1] X.append( cuda.to_cpu(F.softmax(model.forward(x_batch, test=True)).data)) X = numpy.asarray(X)[:, 0, :] logger.info("Calcurating average precision") start_time = timeit.default_timer() labels = swbd_utts_to_labels(utt_ids_tst) distances = pdist(X, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) end_time = timeit.default_timer() logger.info("Average precision: %s (processing time: %f [sec])" % (str(ap), end_time - start_time)) logger.info('Saving output layer to %s' % save_to + ".npz") numpy.savez_compressed(save_to, X)
def _predict_class(self, x): y, = self.func(inputs={'data': x}, outputs=['loss3/classifier'], disable=['loss1/ave_pool', 'loss2/ave_pool'], train=False) return F.softmax(y)