def bbox_transform(self, bbox): num_priors = self.priors.shape[0] if bbox is None or len(bbox) == 0: return np.zeros( (num_priors, 4)).astype(np.float32), np.zeros(num_priors).astype(np.int64) elif isinstance(bbox, np.ndarray): height, width = self.image_size gt_label = None gt_box = bbox if bbox.shape[-1] % 2 == 1: gt_box = bbox[:, :-1] gt_label = bbox[:, -1] gt_box[:, 0::2] /= width gt_box[:, 1::2] /= height # match priors (default boxes) and ground truth boxes if gt_box is not None and len(gt_box) > 0: truths = to_tensor(gt_box).float() labels = to_tensor(gt_label).long() loc_t, conf_t = match(truths, self.priors.data, (0.1, 0.2), labels, self.gt_overlap_tolerance) return to_numpy(loc_t).astype( np.float32), to_numpy(conf_t).astype(np.int64) return np.zeros( (num_priors, 4)).astype(np.float32), np.zeros(num_priors).astype(np.int64)
def get_enumerators(self, *args, negative_case=None, n=10, exclude_samples=True): # 取得整體距離輸入案例最接近,但是離負案例最遠(negative_case)的文字列表 positive_correlate = 0 negative_correlate = 0 exclude_list = [] for arg in args: positive_correlate += element_cosine_distance( self.get_words_vector(arg), self.weight)[0] correlate = positive_correlate if negative_case is None: pass else: if isinstance(negative_case, str): negative_case = [negative_case] if isinstance(negative_case, (list, tuple)): for arg in negative_case: negative_correlate += element_cosine_distance( self.get_words_vector(arg), self.weight)[0] correlate = positive_correlate - negative_correlate sorted_idxes = argsort(correlate, descending=True) sorted_idxes = sorted_idxes[:n + len(exclude_list)] sorted_idxes = to_tensor([ idx for idx in sorted_idxes if idx.item() not in exclude_list ]).long() probs = to_list(correlate[sorted_idxes])[:n] words = [self.idx2word(idx.item()) for idx in sorted_idxes][:n] return OrderedDict(zip(words, probs))
def find_similar(self, reprt: (str, Tensor), n: int = 10, ignore_indexes=None): # 根據文字或是向量查詢空間中最近文字 reprt_idx = None if ignore_indexes is None: ignore_indexes = [] if isinstance(reprt, str): reprt_idx = self.word2idx(reprt) ignore_indexes.append(reprt_idx) reprt = self.weight[reprt_idx].expand_dims( 0) if reprt in self._vocabs else None if is_tensor(reprt): correlate = element_cosine_distance(reprt, self.weight)[0] sorted_idxes = argsort(correlate, descending=True) sorted_idxes = sorted_idxes[:n + len(ignore_indexes)] sorted_idxes = to_tensor([ idx for idx in sorted_idxes if idx.item() not in ignore_indexes ]).long() probs = to_list(correlate[sorted_idxes])[:n] words = [self.idx2word(idx.item()) for idx in sorted_idxes][:n] return OrderedDict(zip(words, probs)) else: raise ValueError('Valid reprt should be a word or a tensor .')
def forward(self, confidence, locations, target_confidence, target_locations): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. target_confidence (batch_size, num_priors): real labels of all the priors. target_locations (batch_size, num_priors, 4): real boxes corresponding all the priors. """ num_classes = confidence.size(2) num_batch = confidence.size(0) confidence_logit = softmax(confidence, -1) confidence_logit_probs, confidence_logit_idxs = confidence_logit.max( -1) probs_mask = confidence_logit_probs > 0.5 label_mask = confidence_logit_idxs > 0 pos_target_mask_all = target_confidence > 0 pos_infer_mask_all = (pos_target_mask_all.float() + probs_mask.float() + label_mask.float() == 3) decode_locations_all = decode( locations, self.priors, (self.center_variance, self.size_variance)) decode_target_locations_all = decode( target_locations, self.priors, (self.center_variance, self.size_variance)) giou_np = 0.0 giou = 0.0 overlaps = 0.0 num_boxes = 0 for i in range(num_batch): pos_target_mask = pos_target_mask_all[i] pos_infer_mask = pos_infer_mask_all[i] decode_locations = decode_locations_all[i][pos_infer_mask, :] decode_target_locations = decode_target_locations_all[i][ pos_target_mask, :] num_boxes += decode_target_locations.shape[0] if decode_target_locations.shape[0] > 0 and decode_locations.shape[ 0] > 0: giou = giou + (1 - (bbox_giou(decode_locations, decode_target_locations).sum(0) / decode_target_locations.shape[0])).sum() overlaps = overlaps + (-log( clip(jaccard(decode_locations, decode_target_locations), min=1e-8)).sum(0) / decode_target_locations.shape[0]).sum() elif decode_target_locations.shape[ 0] == 0 and decode_locations.shape[0] == 0: pass else: giou = giou + 1 overlaps = overlaps - log(to_tensor(1e-8)) giou = giou / num_boxes overlaps = overlaps / num_boxes return giou
def __init__(self, priors, center_variance, size_variance): """Implement SSD Multibox Loss. Basically, Multibox loss combines classification loss and Smooth L1 regression loss. """ super(IoULoss, self).__init__() self.center_variance = center_variance self.size_variance = size_variance self.priors = to_tensor(priors)
def rerec(self, bboxA, img_shape): """Convert bboxA to square.""" bboxA = to_numpy(bboxA) h = bboxA[:, 3] - bboxA[:, 1] w = bboxA[:, 2] - bboxA[:, 0] max_len = np.maximum(w, h) bboxA[:, 0] = bboxA[:, 0] - 0.5 * (max_len - w) bboxA[:, 1] = bboxA[:, 1] - 0.5 * (max_len - h) bboxA[:, 2] = bboxA[:, 0] + max_len bboxA[:, 3] = bboxA[:, 1] + max_len return to_tensor(bboxA)
def build_discriminator(): layers = [] layers.append( Conv2d((5, 5), 32, strides=1, auto_pad=True, use_bias=False, activation=activation, name='first_layer')) layers.append(Conv2d_Block((3, 3), 64, strides=2, auto_pad=True, use_spectral=use_spectral, use_bias=False, activation=activation, normalization=discriminator_norm, name='second_layer')) filter = 64 current_width = image_width // 2 i = 0 while current_width > 8: filter = filter * 2 if i % 2 == 1 else filter if discriminator_build_block == BuildBlockMode.base.value: layers.append( Conv2d_Block((3, 3), num_filters=filter, strides=2, auto_pad=True, use_spectral=use_spectral, use_bias=False, activation=activation, normalization=discriminator_norm, name='base_block{0}'.format(i))) elif discriminator_build_block == BuildBlockMode.resnet.value: layers.extend(resnet_block(num_filters=filter, strides=2, activation=activation, use_spectral=use_spectral, normalization=discriminator_norm, name='resnet_block{0}'.format(i))) elif discriminator_build_block == BuildBlockMode.bottleneck.value: layers.append( bottleneck_block(num_filters=filter, strides=2, reduce=2, activation=activation, use_spectral=use_spectral, normalization=discriminator_norm, name='bottleneck_block{0}'.format(i))) current_width = current_width // 2 i = i + 1 if use_self_attention: layers.insert(-2, SelfAttention(8, name='self_attention')) if use_dropout: layers.insert(-1, Dropout(0.5)) layers.append(Conv2d_Block((3, 3), 128, strides=2, auto_pad=True, use_bias=False, activation='leaky_relu', use_spectral=use_spectral, normalization=discriminator_norm, name='last_conv')) layers.append(Flatten()), if use_minibatch_discrimination: layers.append(MinibatchDiscriminationLayer(name='minibatch_dis')) layers.append(Dense(1, use_bias=False, name='fc')) layers.append(Sigmoid()) dis = Sequential(layers, name='discriminator') out = dis(to_tensor(TensorShape([None, 3, image_width, image_width]).get_dummy_tensor()).to(get_device())) if use_spectral: new_layers = [] for layer in dis: if isinstance(layer, Dense): new_layers.append(torch.nn.utils.spectral_norm(layer)) else: new_layers.append(layer) return Sequential(new_layers, name='discriminator') else: return dis
def generate_priors(feature_map_list, shrinkage_list, image_size, min_boxes, clamp=True) -> torch.Tensor: priors = [] for index in range(0, len(feature_map_list[0])): scale_w = image_size[0] / shrinkage_list[0][index] scale_h = image_size[1] / shrinkage_list[1][index] for j in range(0, feature_map_list[1][index]): for i in range(0, feature_map_list[0][index]): x_center = (i + 0.5) / scale_w y_center = (j + 0.5) / scale_h for min_box in min_boxes[index]: w = min_box / image_size[0] h = min_box / image_size[1] priors.append([x_center, y_center, w, h]) print("priors nums:{}".format(len(priors))) priors = to_tensor(priors).to(get_device()) # .view(-1, 4) if clamp: torch.clamp(priors, 0.0, 1.0, out=priors) return priors
def boxes_nms(self, box_scores, overlap_threshold=0.5, top_k=-1): """Non-maximum suppression. Arguments: box_scores: a float numpy array of shape [n, 5], where each row is (xmin, ymin, xmax, ymax, score). overlap_threshold: a float number. Returns: list with indices of the selected boxes """ # 如果沒有有效的候選區域則回傳空的清單 box_scores = to_tensor(box_scores) if len(box_scores) == 0: return [] score = box_scores[:, 4] boxes = box_scores[:, :4] # 存放過關的索引值 picked = [] # 依照機率信心水準升冪排序 indexes = argsort(score, descending=False) while len(indexes) > 0: # 如此一來,最後一筆即是信心水準最高值 # 加入至過關清單中 current = indexes[-1] picked.append(current.item()) # 計算其餘所有候選框與此當前框之間的IOU if 0 < top_k == len(picked) or len(indexes) == 1: break current_box = boxes[current, :] current_score = score[current] # 除了最後一筆以外的都是其餘框 indexes = indexes[:-1] rest_boxes = boxes[indexes, :] iou = self.iou_of( rest_boxes, expand_dims(current_box, axis=0), ) # IOU未超過門檻值的表示未與當前框重疊,則留下,其他排除 indexes = indexes[iou <= overlap_threshold] return box_scores[picked]
def forward(self, confidence, locations, target_confidence, target_locations): """Compute classification loss and smooth l1 loss. Args: confidence (batch_size, num_priors, num_classes): class predictions. locations (batch_size, num_priors, 4): predicted locations. labels (batch_size, num_priors): real labels of all the priors. boxes (batch_size, num_priors, 4): real boxes corresponding all the priors. """ num_classes = confidence.size(2) # derived from cross_entropy=sum(log(p)) with torch.no_grad(): loss = -F.log_softmax(confidence, dim=2)[:, :, 0] mask = hard_negative_mining(loss, target_confidence, self.neg_pos_ratio) weight = to_tensor(np.array([0.05, 1, 5, 20, 10])) classification_loss = F.cross_entropy(confidence[mask, :].reshape( -1, num_classes), target_confidence[mask], weight=weight, reduction='sum') # classification_loss += 0.1*F.cross_entropy(confidence.reshape(-1, num_classes), target_confidence.reshape( # -1), weight=weight, reduction='sum') pos_mask = target_confidence > 0 locations = locations[pos_mask, :].reshape(-1, 4) target_locations = target_locations[pos_mask, :].reshape(-1, 4) smooth_l1_loss = F.mse_loss(locations, target_locations, reduction='sum') # smooth_l1_loss smooth_l1_loss += F.l1_loss(locations[:, 2:4].exp(), target_locations[:, 2:4].exp(), reduction='sum') num_pos = target_locations.size(0) return (smooth_l1_loss + classification_loss) / num_pos
def get_image_pyrimid(self, img, min_size=None, factor=0.709): if min_size is None: min_size = self.min_size min_face_area = (min_size, min_size) h = img.shape[0] w = img.shape[1] minl = np.amin([h, w]) m = 12.0 / min_size minl = minl * m # create scale pyramid scales = [] images = [] factor_count = 0 while minl >= 12: scales += [m * np.power(factor, factor_count)] scaled_img = rescale(scales[-1])(img.copy()) if img is not None: for func in self.preprocess_flow: if inspect.isfunction(func): scaled_img = func(scaled_img) images.append(to_tensor(image_backend_adaption(scaled_img))) minl = minl * factor factor_count += 1 return images, scales
def infer_single_image(self, img, scale=1): if self._model.built: try: self._model.to(self.device) self._model.eval() if self._model.input_spec.object_type is None: self._model.input_spec.object_type = ObjectType.rgb img = image2array(img) if img.shape[-1] == 4: img = img[:, :, :3] img_orig = img.copy() rescale_scale = 1 for func in self.preprocess_flow: if (inspect.isfunction(func) or isinstance( func, Transform)) and func is not image_backend_adaption: img = func(img, spec=self._model.input_spec) if (inspect.isfunction(func) and func.__qualname__ == 'resize.<locals>.img_op') or ( isinstance(func, Transform) and func.name == 'resize'): rescale_scale = func.scale else: print(func) img = image_backend_adaption(img) inp = to_tensor(np.expand_dims(img, 0)).to( torch.device("cuda" if self._model.weights[0].data. is_cuda else "cpu")).to( self._model.weights[0].data.dtype) confidence, boxes = self._model(inp) boxes = boxes[0] confidence = confidence[0] probs, label = confidence.data.max(-1) mask = probs > self.detection_threshold probs = probs[mask] label = label[mask] boxes = boxes[mask, :] mask = label > 0 probs = probs[mask] label = label[mask] boxes = boxes[mask, :] if boxes is not None and len(boxes) > 0: box_probs = concate([ boxes.float(), label.reshape(-1, 1).float(), probs.reshape(-1, 1).float() ], axis=1) if len(boxes) > 1: box_probs, keep = self.hard_nms( box_probs, nms_threshold=self.nms_threshold, top_k=-1, ) boxes = box_probs[:, :4] boxes[:, 0::2] *= self._model.input_spec.shape.dims[-1] boxes[:, 1::2] *= self._model.input_spec.shape.dims[-2] boxes[:, :4] /= rescale_scale # boxes = boxes * (1 / scale[0]) return img_orig, to_numpy(boxes), to_numpy( box_probs[:, 4]).astype(np.int32), to_numpy(box_probs[:, 5]) else: return img_orig, None, None, None except: PrintException() else: raise ValueError('the model is not built yet.')
def forward(self, x, scale): inp = x.exand_dims(0) boxes = self.pnet(inp) boxes_list = [] if boxes is not None and len(boxes) > 0: box = boxes[:, :4] / scale score = boxes[:, 4:] boxes = concate([box.round_(), score], axis=1) if len(boxes) > 0: boxes_list.append(boxes) ####################################### #########pnet finish ####################################### if len(boxes_list) > 0: boxes = to_tensor(concate(boxes_list, axis=0)) # print('total {0} boxes in pnet in all scale '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1])) boxes = nms(boxes, threshold=self.detection_threshold[0]) print('pnet:{0} boxes '.format(len(boxes))) # print('total {0} boxes after nms '.format(len(boxes))) # score = to_numpy(boxes[:, 4]).reshape(-1) if boxes is not None: # prepare rnet input boxes = self.rerec(boxes, x.shape) new_arr = np.zeros((boxes.shape[0], 3, 24, 24)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = x.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = Resize( (24, 24))(crop_img / 255.0).transpose([2, 0, 1]) # else: # print(box) new_arr = to_tensor(new_arr) r_output1_list = [] r_output2_list = [] r_output3_list = [] if len(new_arr) > 16: for i in range(len(new_arr) // 16 + 1): if i * 16 < len(new_arr): r_out1, r_out2, r_out3 = self.rnet( new_arr[i * 16:(i + 1) * 16, :, :, :]) r_output1_list.append(r_out1) r_output2_list.append(r_out2) r_output3_list.append(r_out3) r_out1 = concate(r_output1_list, axis=0) r_out2 = concate(r_output2_list, axis=0) r_out3 = concate(r_output3_list, axis=0) else: r_out1, r_out2, r_out3 = self.rnet(new_arr) probs = to_numpy(r_out1) keep = np.where(probs[:, 0] > self.detection_threshold[1])[0] r_out1 = r_out1[keep] boxes = boxes[keep] boxes[:, 4] = r_out1[:, 0] r_out2 = r_out2[keep] boxes = calibrate_box(boxes, r_out2) ####################################### #########rnet finish ####################################### boxes = nms(boxes, threshold=self.detection_threshold[1], image_size=(x.shape[0], x.shape[1]), min_size=self.min_size) print('rnet:{0} boxes '.format(len(boxes))) # print('total {0} boxes after nms '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (x.shape[0], x.shape[1])) boxes = self.rerec(boxes, x.shape) new_arr = np.zeros((boxes.shape[0], 3, 48, 48)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = x.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = Resize( (48, 48))(crop_img / 255.0).transpose([2, 0, 1]) # else: # print(box) new_arr = to_tensor(new_arr) o_out1, o_out2, o_out3 = self.onet(new_arr) probs = to_numpy(o_out1) keep = np.where(probs[:, 0] > self.detection_threshold[2])[0] o_out1 = o_out1[keep] boxes = boxes[keep] boxes[:, 4] = o_out1[:, 0] o_out2 = o_out2[keep] o_out3 = o_out3[keep] boxes = calibrate_box(boxes, o_out2) landmarks_x = boxes[:, 0:1] + o_out3[:, 0::2] * ( boxes[:, 2:3] - boxes[:, 0:1] + 1) landmarks_y = boxes[:, 1:2] + o_out3[:, 1::2] * ( boxes[:, 3:4] - boxes[:, 1:2] + 1) boxes = concate([boxes, landmarks_x, landmarks_y], axis=-1)
def infer_single_image(self, img, **kwargs): if self.model.built: self.model.to(self.device) self.model.eval() img = image2array(img) if img.shape[-1] == 4: img = img[:, :, :3] imgs, scales = self.get_image_pyrimid(img) boxes_list = [] for i in range(len(scales)): scaled_img = imgs[i] inp = to_tensor(expand_dims(scaled_img, 0)).to( torch.device("cuda" if self.pnet.weights[0].data. is_cuda else "cpu")).to( self.pnet.weights[0].data.dtype) boxes = self.pnet(inp) if boxes is not None and len(boxes) > 0: scale = scales[i] box = boxes[:, :4] / scale score = boxes[:, 4:] boxes = torch.cat([box.round_(), score], dim=1) if len(boxes) > 0: boxes_list.append(boxes) ####################################### #########pnet finish ####################################### if len(boxes_list) > 0: boxes = to_tensor(torch.cat(boxes_list, dim=0)) #print('total {0} boxes in pnet in all scale '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (img.shape[0], img.shape[1])) boxes = self.boxes_nms( boxes, overlap_threshold=self.detection_threshould[0]) if self.verbose: print('pnet:{0} boxes '.format(len(boxes))) #print('total {0} boxes after nms '.format(len(boxes))) #score = to_numpy(boxes[:, 4]).reshape(-1) if boxes is not None: #prepare rnet input boxes = self.rerec(boxes, img.shape) new_arr = np.zeros((boxes.shape[0], 3, 24, 24)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = img.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = resize((24, 24))(crop_img).transpose( [2, 0, 1]) / 255.0 # else: # print(box) new_arr = to_tensor(new_arr) r_output1_list = [] r_output2_list = [] r_output3_list = [] if len(new_arr) > 16: for i in range(len(new_arr) // 16 + 1): if i * 16 < len(new_arr): r_out1, r_out2, r_out3 = self.rnet( new_arr[i * 16:(i + 1) * 16, :, :, :]) r_output1_list.append(r_out1) r_output2_list.append(r_out2) r_output3_list.append(r_out3) r_out1 = torch.cat(r_output1_list, dim=0) r_out2 = torch.cat(r_output2_list, dim=0) r_out3 = torch.cat(r_output3_list, dim=0) else: r_out1, r_out2, r_out3 = self.rnet(new_arr) probs = r_out1 keep = probs[:, 0] > self.detection_threshould[1] r_out1 = r_out1[keep] boxes = boxes[keep] if len(boxes) == 0: return boxes boxes[:, 4] = r_out1[:, 0] r_out2 = r_out2[keep] boxes = calibrate_box(boxes, r_out2) ####################################### #########rnet finish ####################################### boxes = self.boxes_nms( boxes, overlap_threshold=self.detection_threshould[1]) if self.verbose: print('rnet:{0} boxes '.format(len(boxes))) #print('total {0} boxes after nms '.format(len(boxes))) boxes = clip_boxes_to_image(boxes, (img.shape[0], img.shape[1])) boxes = self.rerec(to_tensor(boxes), img.shape) new_arr = np.zeros((boxes.shape[0], 3, 48, 48)) for k in range(boxes.shape[0]): box = boxes[k] crop_img = img.copy()[int(box[1]):int(box[3]), int(box[0]):int(box[2]), :] if crop_img.shape[0] > 0 and crop_img.shape[1] > 0: new_arr[k] = resize((48, 48))(crop_img).transpose( [2, 0, 1]) / 255.0 # else: # print(box) new_arr = to_tensor(new_arr) o_out1, o_out2, o_out3 = self.onet(new_arr) probs = o_out1 keep = probs[:, 0] > self.detection_threshould[2] o_out1 = o_out1[keep] boxes = boxes[keep] if len(boxes) == 0: return boxes boxes[:, 4] = o_out1[:, 0] o_out2 = o_out2[keep] o_out3 = o_out3[keep] boxes = calibrate_box(boxes, o_out2) landmarks_x = boxes[:, 0:1] + o_out3[:, 0::2] * ( boxes[:, 2:3] - boxes[:, 0:1] + 1) landmarks_y = boxes[:, 1:2] + o_out3[:, 1::2] * ( boxes[:, 3:4] - boxes[:, 1:2] + 1) boxes = torch.cat([boxes, landmarks_x, landmarks_y], dim=-1) ####################################### #########onet finish ####################################### boxes = self.boxes_nms( boxes, overlap_threshold=self.detection_threshould[2]) if self.verbose: print('onet:{0} boxes '.format(len(boxes))) return to_numpy(boxes) else: return None #idx=int(np.argmax(result,-1)[0]) else: raise ValueError('the model is not built yet.')