def get_probability(self, img_list): batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm(range(int(np.ceil(len(test_dist) / batch_size)))) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return rm.softmax(self(img_array)).as_ndarray()
def predict(self, img_list): self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): img_array = np.vstack([load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)[1]).as_ndarray(), axis=1)[0] else: img_array = img_list return np.argmax(rm.softmax(self(img_array)[1]).as_ndarray(), axis=1)
def test_gpu_node_softmax(a): set_cuda_active(True) g1 = Variable(a) g3 = rm.sum(rm.softmax(g1)) g = g3.grad() g_g1 = g.get(g1) g3.to_cpu() set_cuda_active(False) c3 = rm.sum(rm.softmax(g1)) c = c3.grad() c_g1 = c.get(g1) close(g3, c3) close(c_g1, g_g1)
def predict(self, src_seq, beam_width=10): src_seq = src_seq[::-1] xi = [self.src_w2i.get(word, self.src_w2i['<unk>']) for word in src_seq] # input word to index xi = np.array(xi).reshape(len(xi),1) xe = self.l1(xi) # index to vector(embedding) # encode for x in xe: h = self.encode(x.reshape(1,-1)) # decode cnt = 1 limit = 100 L = 0 H = {} H['z'] = h H['state'] = self.l2._state word = '<bos>' sentence = [word] t = (L, sentence, H) Q = [t] is_all_eos = False while is_all_eos == False and cnt <= limit + 1: # limit + 1 for <'eos'> cand = list() is_all_eos = True for L, sentence, H in Q: self.l4._z = H['z'] self.l4._state = H['state'] word = sentence[-1] if word == '<eos>': t = (L, sentence, H) cand.append(t) else: is_all_eos = False yi = [self.tar_w2i[word]] yi = np.array(yi).reshape(len(yi),-1) ye = self.l3(yi) y = ye.reshape(1,-1) yy = self.decode(y) p = rm.softmax(yy) p = rm.log(p).as_ndarray() p = p[0] z = {} z['z'] = self.l4._z z['state'] = self.l4._state for i in range(self.tar_vocab_size): w = self.tar_i2w[i] s = sentence + [w] l = L + p[i] t = (l, s, z) cand.append(t) cand = sorted(cand, key=lambda tup:tup[0], reverse=True) Q = cand[:beam_width] cnt += 1 self.truncate() _, sentence, _ = Q[0] return sentence
def get_bbox(self, z, score_threshold=0.6, nms_threshold=0.45): N = len(z) class_num = len(self.class_map) top_k = 100 if hasattr(z, 'as_ndarray'): z = z.as_ndarray() loc, conf = np.split(z, [4], axis=2) loc = np.concatenate([self.decode_box(loc[n])[None] for n in range(N)], axis=0) loc = np.clip(loc, 0, 1) loc[:, :, 2:] = loc[:, :, 2:] - loc[:, :, :2] loc[:, :, :2] += loc[:, :, 2:] / 2. conf = rm.softmax(conf.transpose(0, 2, 1)).as_ndarray().transpose(0, 2, 1) result_bbox = [] conf = conf[:, :, 1:] conf[conf < score_threshold] = 0 # Transpose are required for manipulate tensors as `class major` order. # (N, box, class) => (N, class, box) sorted_conf_index = np.argsort(-conf, axis=1) # Arg sort by dicending order. keep_index = (np.argsort(sorted_conf_index, axis=1) < top_k).transpose( 0, 2, 1) conf = conf.transpose(0, 2, 1) conf = conf[keep_index].reshape(N, class_num, -1) loc = np.concatenate([ loc[(keep_index[:, c, :].reshape(N, -1, 1) * np.ones_like(loc)).astype(np.bool)].reshape(N, 1, -1, 4) for c in range(class_num) ], axis=1) for n in range(N): nth_result = [] nth_loc = loc[n] for ndind in np.ndindex(*conf.shape[1:]): if conf[n, ndind[0], ndind[1]] < score_threshold: continue nth_result.append({ "box": nth_loc[ndind[0], ndind[1]].tolist(), "name": self.class_map[ndind[0]].decode('utf-8'), "class": int(ndind[0]), "score": float(conf[n, ndind[0], ndind[1]]) }) result_bbox.append(nth_result) ret = nms(result_bbox, nms_threshold) return ret
def forward(self, x): """Performs forward propagation. This function can be called using ``__call__`` method. See following example of method usage. Args: x (ndarray, Node): Input image as an tensor. Returns: (Node): Returns raw output of yolo v1. You can reform it to bounding box form using the method ``get_bbox``. Example: >>> import numpy as np >>> from renom_img.api.detection.yolo_v2 import Yolov2 >>> >>> x = np.random.rand(1, 3, 224, 224) >>> class_map = ["dog", "cat"] >>> model = Yolov2(class_map) >>> y = model.forward(x) # Forward propagation. >>> y = model(x) # Same as above result. >>> >>> bbox = model.get_bbox(y) # The output can be reformed using get_bbox method. """ assert len(self.class_map) > 0, \ "Class map is empty. Please set the attribute class_map when instantiate model class. " +\ "Or, please load already trained model using the method 'load()'." assert self.num_anchor > 0, \ "Anchor list is empty. Please calculate anchor list using create_anchor function, before instantiate model class. " +\ "Or, please load already trained model using the method 'load()'." self._freezed_network.set_auto_update(self.train_whole_network) self._freezed_network.set_models(inference=( not self.train_whole_network or getattr(self, 'inference', False))) h, f = self._freezed_network(x) f = self._conv21(f) h = self._conv1(h) h = self._conv2(rm.concat(h, rm.concat([f[:, :, i::2, j::2] for i in range(2) for j in range(2)]))) out = self._last(h) # Create yolo format. N, C, H, W = h.shape reshaped = out.reshape(N, self.num_anchor, -1, W * H) conf = rm.sigmoid(reshaped[:, :, 0:1]).transpose(0, 2, 1, 3) px = rm.sigmoid(reshaped[:, :, 1:2]).transpose(0, 2, 1, 3) py = rm.sigmoid(reshaped[:, :, 2:3]).transpose(0, 2, 1, 3) pw = rm.exp(reshaped[:, :, 3:4]).transpose(0, 2, 1, 3) ph = rm.exp(reshaped[:, :, 4:5]).transpose(0, 2, 1, 3) cl = rm.softmax(reshaped[:, :, 5:].transpose(0, 2, 1, 3)) return rm.concat(conf, px, py, pw, ph, cl).transpose(0, 2, 1, 3).reshape(N, -1, H, W)
def predict(self, img_list): """Perform prediction. Argument can be an image array, image path list or a image path. Args: img_list(ndarray, list, string): Image array, image path list or image path. Return: (list): List of class of each image. """ batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm(range(int(np.ceil(len(test_dist) / batch_size)))) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
def predict(self, img_list): """ Returns: (Numpy.array or list): If only an image or a path is given, an array whose shape is **(width, height)** is returned. If multiple images or paths are given, then a list in which there are arrays whose shape is **(width, height)** is returned. """ batch_size = 32 self.set_models(inference=True) if isinstance(img_list, (list, str)): if isinstance(img_list, (tuple, list)): if len(img_list) >= 32: test_dist = ImageDistributor(img_list) results = [] bar = tqdm() bar.total = int(np.ceil(len(test_dist) / batch_size)) for i, (x_img_list, _) in enumerate( test_dist.batch(batch_size, shuffle=False)): img_array = np.vstack([ load_img(path, self.imsize)[None] for path in x_img_list ]) img_array = self.preprocess(img_array) results.extend( np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)) bar.update(1) return results img_array = np.vstack( [load_img(path, self.imsize)[None] for path in img_list]) img_array = self.preprocess(img_array) else: img_array = load_img(img_list, self.imsize)[None] img_array = self.preprocess(img_array) return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)[0] else: img_array = img_list return np.argmax(rm.softmax(self(img_array)).as_ndarray(), axis=1)
def yolo_predict(model, input_x): output = model(input_x) batch_size, _, grid_h, grid_w = output.shape output_reshape = np.reshape(output, (batch_size, model.bbox, model.classes+5, grid_h, grid_w)) x, y, w, h, conf, prob = output_reshape[:,:,0:1,:,:], output_reshape[:,:,1:2,:,:],output_reshape[:,:,2:3,:,:], output_reshape[:,:,3:4,:,:], output_reshape[:,:,4:5,:,:], output_reshape[:,:,5:,:,:] x = rm.sigmoid(x) # xのactivation y = rm.sigmoid(y) # yのactivation conf = rm.sigmoid(conf) # confのactivation prob = np.transpose(prob, (0, 2, 1, 3, 4)) prob = rm.softmax(prob) # probablitiyのacitivation prob = np.transpose(prob, (0, 2, 1, 3, 4)) # x, y, w, hを絶対座標へ変換 x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape) y_shift = np.broadcast_to(np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape) w_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 0], (model.bbox, 1, 1, 1)), w.shape) h_anchor = np.broadcast_to(np.reshape(np.array(model.anchors, dtype=np.float32)[:, 1], (model.bbox, 1, 1, 1)), h.shape) #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() box_x = (x + x_shift) / grid_w box_y = (y + y_shift) / grid_h box_w = np.exp(w) * w_anchor / grid_w box_h = np.exp(h) * h_anchor / grid_h return box_x, box_y, box_w, box_h, conf, prob
def func(node, x): return rm.cross_entropy(rm.softmax(node), x)
def predict(self, x): output_network = np.argmax(rm.softmax(self.network(x)).as_ndarray(), axis=1) result_array = [self.lb.classes_[output] for output in output_network] return np.array(result_array)
def run(self): model = self.model self.state = State.STARTED self.running_state = RunningState.TRAINING if self.task_id == Task.DETECTION.value: valid_target = self.valid_dist.get_resized_annotation_list( self.imsize) if self.stop_event.is_set(): # Watch stop event self.updated = True return for e in range(self.total_epoch): release_mem_pool() self.nth_epoch = e if self.stop_event.is_set(): # Watch stop event self.updated = True return model.set_models(inference=False) temp_train_batch_loss_list = [] self.running_state = RunningState.TRAINING self.sync_state() for b, (train_x, train_y) in enumerate( self.train_dist.batch(self.batch_size), 1): if isinstance(self.model, Yolov2) and (b - 1) % 10 == 0 and (b - 1): release_mem_pool() self.nth_batch = b if self.stop_event.is_set(): # Watch stop event self.updated = True return if len(train_x) > 0: with model.train(): loss = model.loss(model(train_x), train_y) reg_loss = loss + model.regularize() try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() loss = float(loss) temp_train_batch_loss_list.append(loss) self.last_batch_loss = loss self.sync_batch_result() if self.stop_event.is_set(): # Watch stop event self.updated = True return reg_loss.grad().update( model.get_optimizer( current_loss=loss, current_epoch=e, total_epoch=self.total_epoch, current_batch=b - 1, total_batch=self.total_batch, avg_valid_loss_list=self.valid_loss_list)) # Thread value changed. self.updated = True self.train_loss_list.append(np.mean(temp_train_batch_loss_list)) self.sync_train_loss() self.updated = True release_mem_pool() self.running_state = RunningState.VALIDATING self.sync_state() if self.task_id != Task.DETECTION.value: valid_target = [] valid_prediction = [] temp_valid_batch_loss_list = [] model.set_models(inference=True) for b, (valid_x, valid_y) in enumerate( self.valid_dist.batch(self.batch_size, shuffle=False)): if self.stop_event.is_set(): # Watch stop event self.updated = True return valid_prediction_in_batch = model(valid_x) loss = model.loss(valid_prediction_in_batch, valid_y) if self.task_id == Task.CLASSIFICATION.value: valid_prediction.append( rm.softmax(valid_prediction_in_batch).as_ndarray()) else: valid_prediction.append( valid_prediction_in_batch.as_ndarray()) if self.task_id != Task.DETECTION.value: valid_target.append(valid_y) try: loss = loss.as_ndarray()[0] except: loss = loss.as_ndarray() loss = float(loss) temp_valid_batch_loss_list.append(loss) self.valid_loss_list.append(np.mean(temp_valid_batch_loss_list)) self.sync_valid_loss() if self.stop_event.is_set(): # Watch stop event self.updated = True return valid_prediction = np.concatenate(valid_prediction, axis=0) if self.task_id != Task.DETECTION.value: valid_target = np.concatenate(valid_target, axis=0) n_valid = min(len(valid_prediction), len(valid_target)) # Depends on each task. loss = self.valid_loss_list[-1] if self.task_id == Task.CLASSIFICATION.value: pred = np.argmax(valid_prediction, axis=1) targ = np.argmax(valid_target, axis=1) _, pr, _, rc, _, f1 = precision_recall_f1_score(pred, targ) prediction = [{ "score": [float(vc) for vc in v], "class": float(p) } for v, p in zip(valid_prediction, pred)] if self.best_epoch_valid_result: if self.best_epoch_valid_result["f1"] <= f1: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } self.sync_best_valid_result() elif self.task_id == Task.DETECTION.value: prediction_box = model.get_bbox(valid_prediction[:n_valid]) prec, rec, _, iou = get_prec_rec_iou(prediction_box, valid_target[:n_valid]) _, mAP = get_ap_and_map(prec, rec) if self.best_epoch_valid_result: if self.best_epoch_valid_result["mAP"] <= mAP: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction_box, "mAP": float(mAP), "IOU": float(iou), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction_box, "mAP": float(mAP), "IOU": float(iou), "loss": float(loss) } self.sync_best_valid_result() elif self.task_id == Task.SEGMENTATION.value: pred = np.argmax(valid_prediction, axis=1) targ = np.argmax(valid_target, axis=1) _, pr, _, rc, _, f1, _, _, _, _ = \ get_segmentation_metrics(pred, targ, n_class=len(self.class_map)) prediction = [] for p, t in zip(pred, targ): lep, lemp, ler, lemr, _, _, _, _, _, _ = get_segmentation_metrics( p[None], t[None], n_class=len(self.class_map)) prediction.append({ "class": p.astype(np.int).tolist(), "recall": {k: float(v) for k, v in ler.items()}, "precision": {k: float(v) for k, v in lep.items()}, }) if self.best_epoch_valid_result: if self.best_epoch_valid_result["f1"] <= f1: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } else: self.best_valid_changed = True self.save_best_model() self.best_epoch_valid_result = { "nth_epoch": e, "prediction": prediction, "recall": float(rc), "precision": float(pr), "f1": float(f1), "loss": float(loss) } self.sync_best_valid_result() # Thread value changed. self.save_last_model() self.updated = True
def predict(self, x): y = self.predictor(x) return rm.softmax(y)
def _oper_cpu(cls, output, t, bbox, classes, init_anchors): batch_size, _, grid_h, grid_w = output.shape output_reshape = rm.reshape( output, (batch_size, bbox, classes + 5, grid_h, grid_w)) x, y, w, h, conf, prob = output_reshape[:, :, 0: 1, :, :], output_reshape[:, :, 1: 2, :, :], output_reshape[:, :, 2: 3, :, :], output_reshape[:, :, 3: 4, :, :], output_reshape[:, :, 4: 5, :, :], output_reshape[:, :, 5:, :, :] x = rm.sigmoid(x) y = rm.sigmoid(y) conf = rm.sigmoid(conf) prob = rm.transpose(prob, (0, 2, 1, 3, 4)).reshape(batch_size, classes, -1) prob = rm.softmax(prob) # prob_exp = np.exp(prob) # prob = prob_exp / np.sum(prob_exp, axis=1, keepdims=True) prob = rm.reshape(prob, (batch_size, classes, bbox, grid_h, grid_w)) deltas = np.zeros(output_reshape.shape, dtype=np.float32) #x.to_cpu() #y.to_cpu() #conf.to_cpu() #prob.to_cpu() #anchor if init_anchors is None: anchors = [[5.375, 5.03125], [5.40625, 4.6875], [2.96875, 2.53125], [2.59375, 2.78125], [1.9375, 3.25]] else: anchors = init_anchors thresh = 0.7 # 教師データ tw = np.ones(w.shape, dtype=np.float32) th = np.ones(h.shape, dtype=np.float32) tx = np.tile(0.5, x.shape).astype(np.float32) ty = np.tile(0.5, y.shape).astype(np.float32) box_learning_scale = np.tile(0.1, x.shape).astype(np.float32) tconf = np.zeros(conf.shape, dtype=np.float32) conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32) tprob = prob.as_ndarray() #print("output") #print(output_reshape[1,1, :,1,1]) x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32), x.shape[1:]) y_shift = np.broadcast_to( np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1), y.shape[1:]) w_anchor = np.broadcast_to( np.reshape( np.array(anchors, dtype=np.float32)[:, 0], (bbox, 1, 1, 1)), w.shape[1:]) h_anchor = np.broadcast_to( np.reshape( np.array(anchors, dtype=np.float32)[:, 1], (bbox, 1, 1, 1)), h.shape[1:]) #x_shift.to_gpu(), y_shift.to_gpu(), w_anchor.to_gpu(), h_anchor.to_gpu() best_ious = [] for batch in range(batch_size): truth_bbox = len(t[batch]) box_x = (x[batch] + x_shift) / grid_w box_y = (y[batch] + y_shift) / grid_h box_w = np.exp(w[batch]) * w_anchor / grid_w box_h = np.exp(h[batch]) * h_anchor / grid_h ious = [] for truth_index in range(truth_bbox): truth_box_x = np.broadcast_to( np.array(t[batch][truth_index]["x"], dtype=np.float32), box_x.shape) truth_box_y = np.broadcast_to( np.array(t[batch][truth_index]["y"], dtype=np.float32), box_y.shape) truth_box_w = np.broadcast_to( np.array(t[batch][truth_index]["w"], dtype=np.float32), box_w.shape) truth_box_h = np.broadcast_to( np.array(t[batch][truth_index]["h"], dtype=np.float32), box_h.shape) #truth_box_x.to_gpu(), truth_box_y.to_gpu(), truth_box_w.to_gpu(), truth_box_h.to_gpu() ious.append( multi_box_iou( Box(box_x, box_y, box_w, box_h), Box(truth_box_x, truth_box_y, truth_box_w, truth_box_h))) ious = np.array(ious) best_ious.append(np.max(ious, axis=0)) best_ious = np.array(best_ious) tconf[best_ious > thresh] = conf[best_ious > thresh] conf_learning_scale[best_ious > thresh] = 0 abs_anchors = anchors / np.array([grid_w, grid_h]) for batch in range(batch_size): for truth_box in t[batch]: truth_h = int(float(truth_box["x"]) * grid_w) truth_w = int(float(truth_box["y"]) * grid_h) truth_n = 0 best_iou = 0.0 for anchor_index, abs_anchor in enumerate(abs_anchors): iou = box_iou( Box(0, 0, float(truth_box["w"]), float(truth_box["h"])), Box(0, 0, abs_anchor[0], abs_anchor[1])) if best_iou < iou: best_iou = iou truth_n = anchor_index box_learning_scale[batch, truth_n, :, truth_h, truth_w] = 1.0 tx[batch, truth_n, :, truth_h, truth_w] = float(truth_box["x"]) * grid_w - truth_w ty[batch, truth_n, :, truth_h, truth_w] = float(truth_box["y"]) * grid_h - truth_h tw[batch, truth_n, :, truth_h, truth_w] = float(truth_box["w"]) / abs_anchors[truth_n][0] th[batch, truth_n, :, truth_h, truth_w] = float(truth_box["h"]) / abs_anchors[truth_n][1] tprob[batch, :, truth_n, truth_h, truth_w] = 0 tprob[batch, int(truth_box["label"]), truth_n, truth_h, truth_w] = 1 full_truth_box = Box(float(truth_box["x"]), float(truth_box["y"]), float(truth_box["w"]), float(truth_box["h"])) predicted_box = Box( (x[batch, truth_n, 0, truth_h, truth_w] + truth_w) / grid_w, (y[batch, truth_n, 0, truth_h, truth_w] + truth_h) / grid_h, np.exp(w[batch, truth_n, 0, truth_h, truth_w]) * abs_anchors[truth_n][0], np.exp(h[batch, truth_n, 0, truth_h, truth_w]) * abs_anchors[truth_n][1]) predicted_iou = box_iou(full_truth_box, predicted_box) tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou conf_learning_scale[batch, truth_n, :, truth_h, truth_w] = 5.0 #box_learning_scale *= 100 #loss #print(np.where(box_learning_scale==1)) x_loss = np.sum((tx - x)**2 * box_learning_scale) / 2 #print(deltas[:,:,0:1,:,:]) deltas[:, :, 0:1, :, :] = ((x - tx) * box_learning_scale * (1 - x) * x).as_ndarray() * 10 #print(deltas.dtype()) #print((x - tx).dtype()) #print(deltas[:,:,0:1,:,:] - ((x - tx) *box_learning_scale * (1 - x) * x)) #print(x-tx) #print(deltas[:,:,0,:,:]) y_loss = np.sum((ty - y)**2 * box_learning_scale) / 2 deltas[:, :, 1:2, :, :] = ((y - ty) * box_learning_scale * (1 - y) * y).as_ndarray() * 10 w_loss = np.sum((tw - np.exp(w))**2 * box_learning_scale) / 2 deltas[:, :, 2:3, :, :] = ((np.exp(w) - tw) * box_learning_scale * np.exp(w)) h_loss = np.sum((th - np.exp(h))**2 * box_learning_scale) / 2 deltas[:, :, 3:4, :, :] = ((np.exp(h) - th) * box_learning_scale * np.exp(h)) c_loss = np.sum((tconf - conf)**2 * conf_learning_scale) / 2 deltas[:, :, 4:5, :, :] = ((conf - tconf) * conf_learning_scale * (1 - conf) * conf).as_ndarray() #print(deltas[:,:,4:5,:,:]) #print(deltas[:,:,4:5,:,:] - (conf - tconf) * conf_learning_scale * (1 - conf) * conf) p_loss = np.sum((tprob - prob)**2) / 2 deltas[:, :, 5:, :, :] = (( ((prob - tprob) * (1 - prob) * prob)).as_ndarray()).transpose(0, 2, 1, 3, 4) * 10 #print(deltas[:,:,5:,:,:] - ((prob - tprob) * (1 - prob) * prob).transpose(0, 2, 1, 3, 4)) if np.isnan(p_loss): p_loss = 0 print( "x_loss: %f y_loss: %f w_loss: %f h_loss: %f c_loss: %f p_loss: %f" % (x_loss, y_loss, w_loss, h_loss, c_loss, p_loss)) loss = x_loss + y_loss + w_loss + h_loss + c_loss + p_loss #loss = p_loss ret = cls._create_node(loss) ret.attrs._output = output ret.attrs._deltas = deltas.reshape(batch_size, bbox * (classes + 5), grid_h, grid_w) # ret.attrs._cells = cells # ret.attrs._bbox = bbox # ret.attrs._classes = classes return ret
def get_bbox(self, z, score_threshold=0.3, nms_threshold=0.4, keep_top_k=200): """ Example: >>> z = model(x) >>> model.get_bbox(z) [[{'box': [0.21, 0.44, 0.11, 0.32], 'score':0.823, 'class':1}], [{'box': [0.87, 0.38, 0.84, 0.22], 'score':0.423, 'class':0}]] Args: z (ndarray): Output array of neural network. The shape of array score_threshold (float): The threshold for confidence score. Predicted boxes which have lower confidence score than the threshold are discarderd. Defaults to 0.3 nms_threshold (float): The threshold for non maximum supression. Defaults to 0.4 Return: (list): List of predicted bbox, score and class of each image. The format of return value is bellow. Box coordinates and size will be returned as ratio to the original image size. Therefore the range of 'box' is [0 ~ 1]. [ [ # Prediction of first image. {'box': [x, y, w, h], 'score':(float), 'class':(int)}, {'box': [x, y, w, h], 'score':(float), 'class':(int)}, ... ], [ # Prediction of second image. {'box': [x, y, w, h], 'score':(float), 'class':(int)}, {'box': [x, y, w, h], 'score':(float), 'class':(int)}, ... ], ... ] Note: Box coordinate and size will be returned as ratio to the original image size. Therefore the range of 'box' is [0 ~ 1]. """ z[:, 4:-8, :] = rm.softmax(z[:, 4:-8, :]).as_ndarray() z = z.transpose((0, 2, 1)) mbox_loc = z[:, :, :4] variances = z[:, :, -4:] mbox_priorbox = z[:, :, -8:-4] mbox_conf = z[:, :, 4:-8] results = [] for i in range(len(mbox_loc)): results.append([]) decoded_bbox = self.decode_boxes(mbox_loc[i], mbox_priorbox[i], variances[i]) for c in range(self.num_class): if c == 0: # background continue c_confs = mbox_conf[i, :, c] c_confs_m = c_confs > score_threshold if len(c_confs[c_confs_m]) > 0: boxes_to_process = decoded_bbox[c_confs_m] confs_to_process = c_confs[c_confs_m] idx = self.nms(boxes_to_process, confs_to_process, nms_threshold) good_boxes = boxes_to_process[idx] confs = confs_to_process[idx][:, None] for j in range(len(confs)): results[-1].append({ "class": c - 1, "score": float(confs[j]), "box": good_boxes[j], 'name': self.class_map[c - 1] }) if len(results[-1]) > 0: scores = np.array([obj['score'] for obj in results[-1]]) argsort = np.argsort(scores)[::-1] results[-1] = np.array(results[-1])[argsort] results[-1] = results[-1][:keep_top_k].tolist() return results