def __call__(self, target, width, height): res = [] labels = [] for obj in target.iter('object'): difficult = int(obj.find('difficult').text) == 1 if not self.keep_difficult and difficult: continue name = obj.find('name').text.upper().strip() bbox = obj.find('bndbox') pts = ['xmin', 'ymin', 'xmax', 'ymax'] bndbox = [] for i, pt in enumerate(pts): cur_pt = int(bbox.find(pt).text) - 1 # scale height or width bndbox.append(cur_pt) label_idx = self.class_to_ind[name] labels.append(label_idx) res += [bndbox] # [xmin, ymin, xmax, ymax, label_ind] target = BoxList(res) classes = torch.tensor(labels) target.fields['labels'] = classes return target
def __getitem__(self, index): img, annot = super().__getitem__(index) annot = [o for o in annot if o['iscrowd'] == 0] boxes = [o['bbox'] for o in annot] boxes = torch.as_tensor(boxes).reshape(-1, 4) target = BoxList(boxes, img.size, mode='xywh').convert('xyxy') classes = [o['category_id'] for o in annot] classes = [self.category2id[c] for c in classes] classes = torch.tensor(classes) target.fields['labels'] = classes target.clip(remove_empty=True) if self.transform is not None: img, target = self.transform(img, target) return img, target, index
def forward(self, image_list, feature_maps): grid_sizes = [feature_map.shape[-2:] for feature_map in feature_maps] anchors_over_all_feature_maps = self.grid_anchors(grid_sizes) anchors = [] for i, (image_height, image_width) in enumerate(image_list.sizes): anchors_in_image = [] for anchors_per_feature_map in anchors_over_all_feature_maps: boxlist = BoxList(anchors_per_feature_map, (image_width, image_height), mode="xyxy") self.add_visibility_to(boxlist) anchors_in_image.append(boxlist) anchors.append(anchors_in_image) return anchors
def select_over_scales(self, boxlists): results = [] for boxlist in boxlists: scores = boxlist.fields['scores'] labels = boxlist.fields['labels'] box = boxlist.box result = [] for j in range(1, self.n_class): id = (labels == j).nonzero().view(-1) score_j = scores[id] box_j = box[id, :].view(-1, 4) box_by_class = BoxList(box_j, boxlist.size, mode='xyxy') box_by_class.fields['scores'] = score_j box_by_class = boxlist_nms(box_by_class, score_j, self.nms_threshold) n_label = len(box_by_class) box_by_class.fields['labels'] = torch.full( (n_label, ), j, dtype=torch.int64, device=scores.device) result.append(box_by_class) result = cat_boxlist(result) n_detection = len(result) if n_detection > self.post_top_n > 0: scores = result.fields['scores'] img_threshold, _ = torch.kthvalue( scores.cpu(), n_detection - self.post_top_n + 1) keep = scores >= img_threshold.item() keep = torch.nonzero(keep).squeeze(1) result = result[keep] results.append(result) return results
def forward_for_single_feature_map(self, box_cls, box_regression, centerness, anchors): N, _, H, W = box_cls.shape A = box_regression.size(1) // 4 C = box_cls.size(1) // A # put in the same format as anchors box_cls = permute_and_flatten(box_cls, N, A, C, H, W) box_cls = box_cls.sigmoid() box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) box_regression = box_regression.reshape(N, -1, 4) candidate_inds = box_cls > self.pre_nms_thresh pre_nms_top_n = candidate_inds.view(N, -1).sum(1) pre_nms_top_n = pre_nms_top_n.clamp(max=self.pre_nms_top_n) centerness = permute_and_flatten(centerness, N, A, 1, H, W) centerness = centerness.reshape(N, -1).sigmoid() # multiply the classification scores with centerness scores box_cls = box_cls * centerness[:, :, None] results = [] for per_box_cls, per_box_regression, per_pre_nms_top_n, per_candidate_inds, per_anchors \ in zip(box_cls, box_regression, pre_nms_top_n, candidate_inds, anchors): per_box_cls = per_box_cls[per_candidate_inds] per_box_cls, top_k_indices = per_box_cls.topk(per_pre_nms_top_n, sorted=False) per_candidate_nonzeros = per_candidate_inds.nonzero()[ top_k_indices, :] per_box_loc = per_candidate_nonzeros[:, 0] per_class = per_candidate_nonzeros[:, 1] + 1 detections = self.box_coder.decode( per_box_regression[per_box_loc, :].view(-1, 4), per_anchors.bbox[per_box_loc, :].view(-1, 4)) boxlist = BoxList(detections, per_anchors.size, mode="xyxy") boxlist.add_field("labels", per_class) boxlist.add_field("scores", torch.sqrt(per_box_cls)) boxlist = boxlist.clip_to_image(remove_empty=False) boxlist = remove_small_boxes(boxlist, self.min_size) results.append(boxlist) return results
def forward_single_feature_map(self, location, cls_pred, box_pred, center_pred, image_sizes): batch, channel, height, width = cls_pred.shape cls_pred = cls_pred.view(batch, channel, height, width).permute(0, 2, 3, 1) cls_pred = cls_pred.reshape(batch, -1, channel).sigmoid() box_pred = box_pred.view(batch, 4, height, width).permute(0, 2, 3, 1) box_pred = box_pred.reshape(batch, -1, 4) center_pred = center_pred.view(batch, 1, height, width).permute(0, 2, 3, 1) center_pred = center_pred.reshape(batch, -1).sigmoid() candid_ids = cls_pred > self.threshold top_ns = candid_ids.view(batch, -1).sum(1) top_ns = top_ns.clamp(max=self.top_n) cls_pred = cls_pred * center_pred[:, :, None] results = [] for i in range(batch): cls_p = cls_pred[i] candid_id = candid_ids[i] cls_p = cls_p[candid_id] candid_nonzero = candid_id.nonzero() box_loc = candid_nonzero[:, 0] class_id = candid_nonzero[:, 1] + 1 box_p = box_pred[i] box_p = box_p[box_loc] loc = location[box_loc] top_n = top_ns[i] if candid_id.sum().item() > top_n.item(): cls_p, top_k_id = cls_p.topk(top_n, sorted=False) class_id = class_id[top_k_id] box_p = box_p[top_k_id] loc = loc[top_k_id] detections = torch.stack( [ loc[:, 0] - box_p[:, 0], loc[:, 1] - box_p[:, 1], loc[:, 0] + box_p[:, 2], loc[:, 1] + box_p[:, 3], ], 1, ) height, width = image_sizes[i] boxlist = BoxList(detections, (int(width), int(height)), mode='xyxy') boxlist.fields['labels'] = class_id boxlist.fields['scores'] = torch.sqrt(cls_p) boxlist = boxlist.clip(remove_empty=False) boxlist = remove_small_box(boxlist, self.min_size) results.append(boxlist) return results
def __init__(self,margin=0.1): BoxList.__init__(self) self.margin = margin
def __init__(self,columns,margin=0.1): BoxList.__init__(self) self.columns = columns self.margin = margin
def __init__(self, margin=0.1): BoxList.__init__(self) self.margin = margin
def forward(self, input, image_sizes=None, targets=None): features = self.backbone(input) #print(features.shape) features = features.view(features.size(0), -1) matrix = (self.fc1(features.relu())) matrix = self.fc2(matrix.relu()) A, B = self.fc3(matrix.relu()), self.fc4(matrix.relu()) A = A.view(A.size(0), self.size, self.size) B = B.view(B.size(0), self.limit, self.limit) #print(matrix) A = torch.einsum('bcd,bde->bce', A, A) B = torch.einsum('bcd,bde->bce', B, B) #print([t.box.shape for t in targets]) if self.training: boxes = [t.box/500 for t in targets if t.box.numel() > 0] #print(boxes[0], boxes[0].shape) labels = [F.one_hot(t.fields['labels'] - 1, self.config.n_class - 1).float()*3 for t in targets if t.box.numel() > 0] #print(labels[0], labels[0].shape) vectors = [torch.cat([b, l], -1)[:self.limit] for b, l in zip(boxes, labels)] old_vectors = vectors vectors = [self.enc2(self.enc1(v).relu()) for v in vectors] rec_vectors = [self.dec2(self.dec1(v.relu()).relu()) for v in vectors] loss_rec = torch.stack([self.crit(o,r) for o, r in zip(old_vectors, rec_vectors)], 0).mean() #print(vectors[0], vectors[.shape) del boxes, labels svd = [LA.svd(m, full_matrices=False) for m in vectors] d = A.device #print([(u.shape, s.shape, vh.shape) for u, s, vh in svd]) #svd = [(u.to(d), s.to(d), vh.to(d), max(u.size(0), vh.size(0))) for u,s,vh in svd] svd = [(pad(u, (self.limit, self.limit)), pad(torch.diag(s), (self.limit, self.limit)), pad(vh, (self.limit, self.limit))) for (u, s, vh) in svd] #print([(vh.shape) for u, s, vh in svd]) U, P = zip(*[(u @ pad(vh, (self.limit, self.limit)), vh.transpose(0, 1) @ s @ vh) for u, s, vh in svd]) #print(vectors[0], '\n\n') #print(U[0] @ P[0]) P = torch.stack(P, 0) loss_herm = self.crit(A, P) D = [U[i] @ B[i] @ U[i].transpose(0,1) for i in range(len(U))] #print(D[0]) loss_unit = torch.stack([self.crit(d, torch.zeros_like(d)) / torch.diag(d).square().mean() for d in D], 0).mean() #print(D[0]) #print(matrix[0, 0, 0, 0], P[0, 0, 0]) losses = { 'loss_herm': loss_herm, 'loss_unit': loss_unit, 'loss_rec': loss_rec } return None, losses else: #print(matrix.shape) #print(P.shape, A.shape) w, v = LA.eigh(B) v = torch.einsum('bcd,bde->bce', v.transpose(-1, 2), A).abs() v = v.view(-1, self.limit) v = self.dec2(self.dec1(v.relu()).relu()) v = v.view(-1, self.limit, self.limit) #print(v) #print(w.sort(-1)) #print(v, '\n\n') b = v[:, :, :4] * 500 l = v[:, :, :(self.config.n_class - 1)] l = l.argmax(-1) print(b.shape, l.shape) print(b[0], l[0]+1) #print(b.shape, l.shape, w.shape) boxes = [] for i in range(w.size(0)): mybox = b[i][w[i] > 0] mylabel = l[i][w[i] > 0] myscores = w[i][w[i] > 0] #print(mybox, mylabel) #print(targets[i].box, targets[i].fields['labels']) box = BoxList(mybox, image_sizes[i]) box.fields['labels'] = mylabel + 1 box.fields['scores'] = myscores boxes.append(box) return boxes, None
def __init__(self, columns, margin=0.1): BoxList.__init__(self) self.columns = columns self.margin = margin