def _forward(self, X, y=None, compute_loss_grad=True, state='training'): """ Make a forward pass. Note that if y is None, then loss score and loss grads will be None in output tuple. :param X: numpy.array, input data :param y: numpy.array, input labels. Default None. If None loss and loss gradient won`t be computed. :param compute_loss_grad: bool, indicates whether to compute loss derivatives :return: tuple, (loss_score, predictions, loss_grads). If y is None only predictions are not None. """ for layer in self.layers: X = layer.forward(layer_input=X, state=state, seed=self.seed) if y is None: if self.loss and self.loss.name == "CrossEntropy": return softmax(X) return None, X, None loss, preds, dX = self.loss.build(X, y, compute_derivative=compute_loss_grad) return loss, preds, dX
def head_pose_postprocess(preds_hp, theta): """ Postprocesses the raw head pose predictions (scores for yaw, pitch, roll) and returns the head poses (roll, yaw, pitch) in radians. Parameters ---------- preds_hp: NumPy array Raw head pose predictions. theta: NumPy array rotation angle(s) in radians of the cropping bounding boxes. Returns ------- head_pose: NumPy array Roll (left+), yaw (right+), pitch (down+) in radians in the input image coordinates (of the head pose network). """ head_pose = np.empty((len(preds_hp[0]),3), dtype=np.float32) for i_new, i in enumerate([2, 0, 1]): score = preds_hp[i] pred = softmax(score) tmp = (pred * np.arange(66)[np.newaxis]).sum(axis=1) head_pose[:, i_new] = (tmp * 3 - 99) # At this point, we have roll left+, yaw right+, pitch up+ in degrees head_pose *= np.pi / 180 head_pose[:, 2] *= -1 # pitch down+ head_pose_orig = head_pose.copy() head_pose_orig[:, 0] += theta return head_pose, head_pose_orig
def get_policy(action_values, strategy, k): x = None if strategy == 'pow': x = softmax_pow(action_values, k) elif strategy == 'exp': x = softmax(action_values, k) x[x < 1e-6] = 0 x[x > 1 - 1e-6] = 1 return x
def predict(net, img): img = preprocess(img) # feedforward output = net.predict([img]) logits = output[0] pred = softmax(logits, axis=1) return pred
def predict(net, img): img = preprocess(img) # feedforward output = net.predict([img]) output = output[0] prob = softmax(output) return prob[0]
def recognize_from_frame(net, detector, frame): spoof_thresh = args.spoof_thresh # detect face detections = compute_blazeface( detector, frame, anchor_path='../../face_detection/blazeface/anchorsback.npy', back=True, min_score_thresh=FACE_MIN_SCORE_THRESH) # adjust face rectangle new_detections = [] for detection in detections: margin = 1.5 r = ailia.DetectorObject( category=detection.category, prob=detection.prob, x=detection.x - detection.w * (margin - 1.0) / 2, y=detection.y - detection.h * (margin - 1.0) / 2 - detection.h * margin / 8, w=detection.w * margin, h=detection.h * margin, ) new_detections.append(r) # crop, preprocess images = [] detections = [] for obj in new_detections: # get detected face margin = 1.0 crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame) if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0: continue img = preprocess(crop_img) images.append(img) detections.append( (top_left[0], top_left[1], bottom_right[0], bottom_right[1])) if not images: return frame images = np.concatenate(images) # feedforward output = net.predict([images]) logits = output[0] preds = softmax(logits, axis=1) frame = draw_detections(frame, detections, preds, spoof_thresh) return frame
def build(self, layer_input, true_labels, compute_derivative=True): batch_size = layer_input.shape[0] probs_unnorm, Z, shifted_input = softmax(layer_input, return_separately=True) log_probs = shifted_input - np.log(Z) loss = -np.sum(log_probs[range(batch_size), true_labels.flatten()]) / float(batch_size) loss_derivative = None predicted_labels = np.argmax(log_probs, axis=1).reshape(log_probs.shape[0], 1) if compute_derivative: loss_derivative = probs_unnorm / Z loss_derivative[range(batch_size), true_labels.flatten()] += -1. loss_derivative /= float(batch_size) return loss, predicted_labels, loss_derivative
def get_gradients(self, inputs, expected_outputs): bias_gradients = [np.zeros(bias.shape) for bias in self.biases] weight_gradients = [np.zeros(weight.shape) for weight in self.weights] # forward pass activations = [inputs] activation = activations[0] sigmoid_z_caches = [] for i in range(len(self.biases)): bias = self.biases[i] weight = self.weights[i] z = np.dot(weight, activation) + bias if i == len(self.biases) - 1: # output layer activation = math_utils.softmax(z) else: # hidden layers activation = math_utils.sigmoid(z) activations.append(activation) sigmoid_z_caches.append(z) # backward propagation # err for the output layer err = math_utils.cross_entropy(activations[-1], expected_outputs) bias_gradients[-1] = err weight_gradients[-1] = np.dot(err, activations[-2].T) # err for the hidden layers for i in range(len(self.layer_sizes) - 1, 1, -1): err = np.dot(self.weights[i-1].T, err) * \ math_utils.sigmoid_derivative(sigmoid_z_caches[i-2]) bias_gradients[i - 2] = err weight_gradients[i - 2] = np.dot(err, activations[i - 2].T) return { 'bias_gradients': bias_gradients, 'weight_gradients': weight_gradients }
def predict(net, img, text_feature): img = preprocess(img) # feedforward if not args.onnx: output = net.predict([img]) else: output = net.run(None, {'image': img}) image_feature = output[0] image_feature = image_feature / np.linalg.norm( image_feature, ord=2, axis=-1, keepdims=True) logit_scale = 100 logits_per_image = (image_feature * logit_scale).dot(text_feature.T) pred = softmax(logits_per_image, axis=1) return pred[0]
def decode_response(cls_logits, center_logits, reg_logits, locations, boxes, use_centerness=True, sigma=0.4): cls_logits = softmax(cls_logits, axis=1) cls_logits = cls_logits[:, 1:2, :, :] if use_centerness: centerness = sigmoid(center_logits) obj_confidence = cls_logits * centerness else: obj_confidence = cls_logits num_track_objects = obj_confidence.shape[0] obj_confidence = obj_confidence.reshape((num_track_objects, -1)) tlbr = reg_logits.reshape((num_track_objects, 4, -1)) scale_penalty = _get_scale_penalty(tlbr, boxes) cos_window = _get_cosine_window_penalty(tlbr) p_obj_confidence = (obj_confidence * scale_penalty) * (1 - sigma) + sigma * cos_window idxs = np.argmax(p_obj_confidence, axis=1) target_ids = np.arange(num_track_objects) bb_c = locations[target_ids, idxs, :] shift_tlbr = tlbr[target_ids, :, idxs] bb_tl_x = bb_c[:, 0:1] - shift_tlbr[:, 0:1] bb_tl_y = bb_c[:, 1:2] - shift_tlbr[:, 1:2] bb_br_x = bb_c[:, 0:1] + shift_tlbr[:, 2:3] bb_br_y = bb_c[:, 1:2] + shift_tlbr[:, 3:4] bb = np.concatenate((bb_tl_x, bb_tl_y, bb_br_x, bb_br_y), axis=1) cls_logits = cls_logits.reshape((num_track_objects, -1)) bb_conf = cls_logits[target_ids, idxs] return bb, bb_conf
def post_processing( class_logits, box_regression, bbox, ids=None, labels=None): prob = softmax(class_logits, -1) proposals = box_decode( box_regression, bbox, weights=(10.0, 10.0, 5.0, 5.0) ) num_classes = prob.shape[1] # deafult id is -1 ids = ids if ids is not None else np.zeros(len(bbox), dtype=int) - 1 # this only happens for tracks if labels is not None and 0 < len(labels): # tracks track_inds = np.nonzero(ids >= 0)[0] # avoid track bbs be suppressed during nms if 0 < len(track_inds): prob_cp = np.array(prob) prob[track_inds, :] = 0. prob[track_inds, labels] = prob_cp[track_inds, labels] + 1. boxes = BBox( bbox=proposals.reshape(-1, 4), scores=prob.reshape(-1), ids=ids ) boxes.bbox[:, 0] = boxes.bbox[:, 0].clip(0, max=IMAGE_WIDTH - 1) boxes.bbox[:, 1] = boxes.bbox[:, 1].clip(0, max=IMAGE_HEIGHT - 1) boxes.bbox[:, 2] = boxes.bbox[:, 2].clip(0, max=IMAGE_WIDTH - 1) boxes.bbox[:, 3] = boxes.bbox[:, 3].clip(0, max=IMAGE_HEIGHT - 1) boxes = filter_results(boxes, num_classes) return boxes
def get_policy(self, observations): policies = [[] for _ in range(self.city.N)] for road in self.city.roads: policy = np.zeros((len(road.reachable_roads, ))) for i, road_index in enumerate(road.reachable_roads): v = observations[road_index][1] if not self.order_proportional: v = max(v-observations[road_index][0], 0) policy[i] = v if policy.sum() == 0: policy.fill(1) if self.strategy == 0: policy = np.where(policy == np.amax(policy), 1.0, 0.0) policy /= policy.sum() elif self.strategy == 1: policy /= policy.sum() if self.policy_pow != 1: policy = softmax_pow(policy, self.policy_pow) else: policy = softmax(policy, self.policy_pow) policies[road.uuid] = policy return policies
def forward(self, layer_input, *args, **kwargs): self.current_layer_input = layer_input self.current_layer_output = softmax(layer_input) return self.current_layer_output
def postprocess(self, scores, raw_boxes, ResizeM, raw_shape): # generate centers decode_boxes = [] select_scores = [] for stride, box_distribute, score in zip(self.strides, raw_boxes, scores): # centers fm_h = self.input_shape[0] / stride fm_w = self.input_shape[1] / stride h_range = np.arange(fm_h) w_range = np.arange(fm_w) ww, hh = np.meshgrid(w_range, h_range) ct_row = (hh.flatten() + 0.5) * stride ct_col = (ww.flatten() + 0.5) * stride center = np.stack((ct_col, ct_row, ct_col, ct_row), axis=1) # box distribution to distance reg_range = np.arange(self.reg_max + 1) box_distance = box_distribute.reshape((-1, self.reg_max + 1)) box_distance = softmax(box_distance, axis=1) box_distance = box_distance * np.expand_dims(reg_range, axis=0) box_distance = np.sum(box_distance, axis=1).reshape((-1, 4)) box_distance = box_distance * stride # top K candidate topk_idx = np.argsort(score.max(axis=1))[::-1] topk_idx = topk_idx[:self.num_candidate] center = center[topk_idx] score = score[topk_idx] box_distance = box_distance[topk_idx] # decode box decode_box = center + [-1, -1, 1, 1] * box_distance select_scores.append(score) decode_boxes.append(decode_box) # nms bboxes = np.concatenate(decode_boxes, axis=0) confidences = np.concatenate(select_scores, axis=0) picked_box_probs = [] picked_labels = [] for class_index in range(0, confidences.shape[1]): probs = confidences[:, class_index] mask = probs > self.prob_threshold probs = probs[mask] if probs.shape[0] == 0: continue subset_boxes = bboxes[mask, :] box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1) box_probs = hard_nms( box_probs, iou_threshold=self.iou_threshold, top_k=self.top_k, ) picked_box_probs.append(box_probs) picked_labels.extend([class_index] * box_probs.shape[0]) if not picked_box_probs: return np.array([]), np.array([]), np.array([]) picked_box_probs = np.concatenate(picked_box_probs) # resize output boxes picked_box_probs[:, :4] = warp_boxes( picked_box_probs[:, :4], np.linalg.inv(ResizeM), raw_shape[1], raw_shape[0]) return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]
def postprocess(output, arch): if arch == "erfnet": output = softmax(output, axis=1) elif arch == "scnn": output = output.transpose((0, 3, 1, 2)) return output
def recognize_from_video(enc, dec): video_file = args.video if args.video else args.input[0] capture = webcamera_utils.get_capture(video_file) assert capture.isOpened(), 'Cannot capture source' # create video writer if savepath is specified as video format if args.savepath != None: logger.warning( 'currently, video results cannot be output correctly...' ) f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None sequence_size = 16 embeddings = [] while True: ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = preprocess(frame) # feedforward output = enc.predict([img]) embedding = output[0] embedding = embedding.reshape((1, -1)) embeddings.append(embedding) embeddings = embeddings[-sequence_size:] if len(embeddings) == sequence_size: decoder_input = np.concatenate(embeddings, axis=0) decoder_input = np.expand_dims(decoder_input, axis=0) output = dec.predict([decoder_input]) logits = output[0] probs = softmax(logits) probs = probs[0] i = np.argmax(probs) display_text = '{} - {:.2f}%'.format( LABELS[i], probs[np.argmax(probs)] * 100 ) else: display_text = 'Preparing...' frame = render_frame(frame, display_text) cv2.imshow('frame', frame) # save results if writer is not None: writer.write(frame) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')