def clip_to_image(self, remove_empty=True): if self.jittor and not isinstance(self.bbox,jt.Var): self.to_jittor() if self.jittor: if self.bbox.numel()==0: return self TO_REMOVE = 1 self.bbox[:, 0] = jt.clamp(self.bbox[:, 0] ,min_v=0, max_v=self.size[0] - TO_REMOVE) self.bbox[:, 1]= jt.clamp(self.bbox[:, 1],min_v=0, max_v=self.size[1] - TO_REMOVE) self.bbox[:, 2]= jt.clamp(self.bbox[:, 2],min_v=0, max_v=self.size[0] - TO_REMOVE) self.bbox[:, 3]= jt.clamp(self.bbox[:, 3],min_v=0, max_v=self.size[1] - TO_REMOVE) if remove_empty: box = self.bbox keep = jt.logical_and((box[:, 3] > box[:, 1]),(box[:, 2] > box[:, 0])) return self[keep] else: if self.bbox.size==0: return self TO_REMOVE = 1 self.bbox[:, 0] = np.clip(self.bbox[:, 0] ,0, self.size[0] - TO_REMOVE) self.bbox[:, 1]= np.clip(self.bbox[:, 1],0, self.size[1] - TO_REMOVE) self.bbox[:, 2]= np.clip(self.bbox[:, 2],0, self.size[0] - TO_REMOVE) self.bbox[:, 3]= np.clip(self.bbox[:, 3],0, self.size[1] - TO_REMOVE) if remove_empty: box = self.bbox keep = np.where((box[:, 3] > box[:, 1])&(box[:, 2] > box[:, 0]))[0] return self[keep] return self
def clip_to_image(self, remove_empty=True): if not isinstance(self.bbox, jt.Var): self.to_jittor() #print(self.bbox) if self.bbox.numel() == 0: return self TO_REMOVE = 1 self.bbox[:, 0] = jt.clamp(self.bbox[:, 0], min_v=0, max_v=self.size[0] - TO_REMOVE) self.bbox[:, 1] = jt.clamp(self.bbox[:, 1], min_v=0, max_v=self.size[1] - TO_REMOVE) self.bbox[:, 2] = jt.clamp(self.bbox[:, 2], min_v=0, max_v=self.size[0] - TO_REMOVE) self.bbox[:, 3] = jt.clamp(self.bbox[:, 3], min_v=0, max_v=self.size[1] - TO_REMOVE) if remove_empty: box = self.bbox keep = jt.logical_and((box[:, 3] > box[:, 1]), (box[:, 2] > box[:, 0])) #print(keep) return self[keep] return self
def sample(self, sample_shape=()): shape = sample_shape + self.probs.shape[:-1] + (1, ) rand = jt.rand(shape) one_hot = jt.logical_and(self.cum_probs_l < rand, rand <= self.cum_probs_r) index = one_hot.index(one_hot.ndim - 1) return (one_hot * index).sum(-1)
def remove_small_boxes(boxlist, min_size): """ Only keep boxes with both sides >= min_size Arguments: boxlist (Boxlist) min_size (int) """ # TODO maybe add an API for querying the ws / hs xywh_boxes = boxlist.convert("xywh").bbox _, _, ws, hs = xywh_boxes.unbind(dim=1) keep = jt.where(jt.logical_and((ws >= min_size), (hs >= min_size)))[0] return boxlist[keep]
def cross_entropy_loss(output, target, ignore_index=None): if len(output.shape) == 4: c_dim = output.shape[1] output = output.transpose((0, 2, 3, 1)) output = output.reshape((-1, c_dim)) if ignore_index is not None: target = jt.ternary(target == ignore_index, jt.array(-1).broadcast(target), target) mask = jt.logical_and(target >= 0, target < output.shape[1]) target = target.reshape((-1, )) target = target.broadcast(output, [1]) target = target.index(1) == target output = output - output.max([1], keepdims=True) loss = output.exp().sum(1).log() loss = loss - (output * target).sum(1) if ignore_index is None: return loss.mean() else: return loss.sum() / jt.maximum(mask.int().sum(), 1)
def __call__(self, match_quality_matrix): """ Args: match_quality_matrix (Tensor[float]): an MxN tensor, containing the pairwise quality between M ground-truth elements and N predicted elements. Returns: matches (Tensor[int64]): an N tensor where N[i] is a matched gt in [0, M - 1] or a negative value indicating that prediction i could not be matched. """ if match_quality_matrix.numel() == 0: # empty targets or proposals not supported during training if match_quality_matrix.shape[0] == 0: raise ValueError( "No ground-truth boxes available for one of the images " "during training") else: raise ValueError( "No proposal boxes available for one of the images " "during training") # match_quality_matrix is M (gt) x N (predicted) # Max over gt elements (dim 0) to find best gt candidate for each prediction matches,matched_vals = match_quality_matrix.argmax(dim=0) if self.allow_low_quality_matches: all_matches = matches.clone() # Assign candidate matches with low quality to negative (unassigned) values below_low_threshold = matched_vals < self.low_threshold between_thresholds = jt.logical_and(matched_vals >= self.low_threshold,matched_vals < self.high_threshold) matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS if self.allow_low_quality_matches: self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) return matches
def sample(self, sample_shape=[]): shape = sample_shape + self.probs.shape[:-1] + (1, ) rand = jt.rand(shape) one_hot = jt.logical_and(self.cum_probs_l < rand, rand <= self.cum_probs_r).float() return one_hot
def match(pos_thresh, neg_thresh, truths, priors, labels, crowd_boxes, loc_t, conf_t, idx_t, idx, loc_data): """Match each prior box with the ground truth box of the highest jaccard overlap, encode the bounding boxes, then return the matched indices corresponding to both confidence and location preds. Args: pos_thresh: (float) IoU > pos_thresh ==> positive. neg_thresh: (float) IoU < neg_thresh ==> negative. truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors]. priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4]. labels: (tensor) All the class labels for the image, Shape: [num_obj]. crowd_boxes: (tensor) All the crowd box annotations or None if there are none. loc_t: (tensor) Tensor to be filled w/ endcoded location targets. conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds. Note: -1 means neutral. idx_t: (tensor) Tensor to be filled w/ the index of the matched gt box for each prior. idx: (int) current batch index. loc_data: (tensor) The predicted bbox regression coordinates for this batch. Return: The matched indices corresponding to 1)location and 2)confidence preds. """ decoded_priors = decode( loc_data, priors, cfg.use_yolo_regressors ) if cfg.use_prediction_matching else point_form(priors) # Size [num_objects, num_priors] overlaps = jaccard( truths, decoded_priors) if not cfg.use_change_matching else change( truths, decoded_priors) # Size [num_priors] best ground truth for each prior best_truth_idx, best_truth_overlap = overlaps.argmax(dim=0) # We want to ensure that each gt gets used at least once so that we don't # waste any training data. In order to do that, find the max overlap anchor # with each gt, and force that anchor to use that gt. for _ in range(overlaps.shape[0]): # Find j, the gt with the highest overlap with a prior # In effect, this will loop through overlaps.shape[0] in a "smart" order, # always choosing the highest overlap first. best_prior_idx, best_prior_overlap = overlaps.argmax(dim=1) j, _ = best_prior_overlap.argmax(dim=0) # Find i, the highest overlap anchor with this gt i = best_prior_idx[j] # Set all other overlaps with i to be -1 so that no other gt uses it overlaps[:, i] = -1 # Set all other overlaps with j to be -1 so that this loop never uses j again overlaps[j, :] = -1 # Overwrite i's score to be 2 so it doesn't get thresholded ever best_truth_overlap[i] = 2 # Set the gt to be used for i to be j, overwriting whatever was there best_truth_idx[i] = j matches = truths[best_truth_idx] # Shape: [num_priors,4] conf = labels[best_truth_idx] + 1 # Shape: [num_priors] conf[best_truth_overlap < pos_thresh] = -1 # label as neutral conf[best_truth_overlap < neg_thresh] = 0 # label as background # Deal with crowd annotations for COCO if crowd_boxes is not None and cfg.crowd_iou_threshold < 1: # Size [num_priors, num_crowds] crowd_overlaps = jaccard(decoded_priors, crowd_boxes, iscrowd=True) # Size [num_priors] best_crowd_idx, best_crowd_overlap = crowd_overlaps.argmax(1) # Set non-positives with crowd iou of over the threshold to be neutral. conf[jt.logical_and(conf <= 0, best_crowd_overlap > cfg.crowd_iou_threshold)] = -1 # print('matches',matches) loc = encode(matches, priors, cfg.use_yolo_regressors) # print('loc',loc) loc_t[idx] = loc # [num_priors,4] encoded offsets to learn conf_t[idx] = conf # [num_priors] top class label for each prior idx_t[idx] = best_truth_idx # [num_priors] indices for lookup
def build_targets(p, targets, model): # Build targets for compute_loss(), input targets(image,class,x,y,w,h) det = model.model[-1] # Detect() module na, nt = det.na, targets.shape[0] # number of anchors, targets tcls, tbox, indices, anch = [], [], [], [] gain = jt.ones((7, )) # normalized to gridspace gain ai = jt.index( (na, ), dim=0).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt) targets = jt.contrib.concat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices g = 0.5 # bias off = jt.array( [ [0, 0], # [1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m # [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm ], ).float() * g # offsets for i in range(det.nl): anchors = det.anchors[i] gain[2:6] = jt.array( [p[i].shape[3], p[i].shape[2], p[i].shape[3], p[i].shape[2]]) # xyxy gain # Match targets to anchors t = targets * gain if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio j = jt.maximum(r, 1. / r).max(2) < model.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[jt.array([2, 3])] - gxy # inverse # j, k = jt.logical_and((gxy % 1. < g), (gxy > 1.)).int().transpose(1,0).bool() # l, m = jt.logical_and((gxi % 1. < g),(gxi > 1.)).int().transpose(1,0).bool() jk = jt.logical_and((gxy % 1. < g), (gxy > 1.)) lm = jt.logical_and((gxi % 1. < g), (gxi > 1.)) j, k = jk[:, 0], jk[:, 1] l, m = lm[:, 0], lm[:, 1] j = jt.stack((jt.ones_like(j), )) t = t.repeat((off.shape[0], 1, 1))[j] offsets = (jt.zeros_like(gxy)[None] + off[:, None])[j] else: t = targets[0] offsets = 0 # Define b = t[:, 0].int32() c = t[:, 1].int32() # image, class gxy = t[:, 2:4] # grid xy gwh = t[:, 4:6] # grid wh gij = (gxy - offsets).int32() gi, gj = gij[:, 0], gij[:, 1] # grid xy indices # Append a = t[:, 6].int32() # anchor indices indices.append((b, a, gj.clamp(0, gain[3] - 1), gi.clamp(0, gain[2] - 1))) # image, anchor, grid indices tbox.append(jt.contrib.concat((gxy - gij, gwh), 1)) # box anch.append(anchors[a]) # anchors tcls.append(c) # class return tcls, tbox, indices, anch