Python Boxes.clip примеры использования

Язык программирования: Python

Пространство имен/Пакет: detectron2.structures.boxes

Класс/Тип: Boxes

Метод/Функция: clip

Примеров на hotexamples.com: 3

Python Boxes.clip - 3 примера найдено. Это лучшие примеры Python кода для detectron2.structures.boxes.Boxes.clip, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Boxes(21)

clip(3)

area(1)

cat(1)

clone(1)

Основные методы

Boxes (21)

clip (3)

area (1)

cat (1)

clone (1)

Пример #1

Показать файл

Файл: test_rotated_boxes.py Проект: henrylee2570/NoisyAnchor

    def test_clip_area_0_degree(self):
        for _ in range(50):
            num_boxes = 100
            boxes_5d = torch.zeros(num_boxes, 5)
            boxes_5d[:, 0] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
            boxes_5d[:, 1] = torch.FloatTensor(num_boxes).uniform_(-100, 500)
            boxes_5d[:, 2] = torch.FloatTensor(num_boxes).uniform_(0, 500)
            boxes_5d[:, 3] = torch.FloatTensor(num_boxes).uniform_(0, 500)
            # Convert from (x_ctr, y_ctr, w, h, 0) to  (x1, y1, x2, y2)
            boxes_4d = torch.zeros(num_boxes, 4)
            boxes_4d[:, 0] = boxes_5d[:, 0] - boxes_5d[:, 2] / 2.0
            boxes_4d[:, 1] = boxes_5d[:, 1] - boxes_5d[:, 3] / 2.0
            boxes_4d[:, 2] = boxes_5d[:, 0] + boxes_5d[:, 2] / 2.0
            boxes_4d[:, 3] = boxes_5d[:, 1] + boxes_5d[:, 3] / 2.0

            image_size = (500, 600)
            test_boxes_4d = Boxes(boxes_4d)
            test_boxes_5d = RotatedBoxes(boxes_5d)
            # Before clip
            areas_4d = test_boxes_4d.area()
            areas_5d = test_boxes_5d.area()
            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))
            # After clip
            test_boxes_4d.clip(image_size)
            test_boxes_5d.clip(image_size)
            areas_4d = test_boxes_4d.area()
            areas_5d = test_boxes_5d.area()
            self.assertTrue(torch.allclose(areas_4d, areas_5d, atol=1e-1, rtol=1e-5))

Пример #2

Показать файл

Файл: vod_playground.py Проект: kaiser-p/video-object-detection

def generate_poposals(images, model, score_threshold=0):
    inputs = [{
        "image":
        torch.as_tensor(image.astype("float32").transpose(2, 0, 1)),
        "height":
        image.shape[0],
        "width":
        image.shape[1]
    } for image in images]

    with torch.no_grad():
        images = model.preprocess_image(inputs)
        features = model.backbone(images.tensor)
        proposals, _ = model.proposal_generator(images, features, None)

        features_ = [features[f] for f in model.roi_heads.box_in_features]
        box_features = model.roi_heads.box_pooler(
            features_, [x.proposal_boxes for x in proposals])
        box_features = model.roi_heads.box_head(box_features)
        proposals_scores, proposals_deltas = model.roi_heads.box_predictor(
            box_features)

        boxes_tensors = model.roi_heads.box_predictor.predict_boxes(
            (proposals_scores, proposals_deltas), proposals)
        scores = model.roi_heads.box_predictor.predict_probs(
            (proposals_scores, proposals_deltas), proposals)

        result = []
        for i in range(len(inputs)):
            image_size = proposals[i].image_size
            num_bbox_reg_classes = boxes_tensors[i].shape[1] // 4
            boxes = Boxes(boxes_tensors[i].reshape(-1, 4))
            boxes.clip(image_size)
            boxes = boxes.tensor.view(-1, num_bbox_reg_classes, 4)

            img_scores = scores[i][:, :-1]
            max_scores, pred_classes = torch.max(img_scores, dim=1)

            keep_mask = max_scores > score_threshold
            filtered_scores = img_scores[keep_mask, :]
            filtered_max_scores = max_scores[keep_mask]
            filtered_pred_classes = pred_classes[keep_mask]
            boxes = boxes[keep_mask, filtered_pred_classes, :]

            result_instance = Instances(image_size)
            result_instance.pred_boxes = Boxes(boxes)
            result_instance.scores = filtered_max_scores
            result_instance.pred_classes = filtered_pred_classes
            result_instance.class_distributions = filtered_scores
            result.append(result_instance)

    return result

Пример #3

Показать файл

Файл: weakly_visual_grounding_regression.py Проект: youngfly11/ReIR-WeaklyGrounding.pytorch

    def forward(self, features, all_phrase_ids, targets, precomp_boxes, precomp_score,
                precomp_det_label, image_scale, all_sent_sgs, all_sentences, image_unique_id, det_label_embedding):

        """
        :param obj_proposals: proposal from each images
        :param features: features maps from the backbone
        :param target: gt relation labels
        :param object_vocab, object_vocab_len [[xxx,xxx],[xxx],[xxx]], [2,1,1]
        :param sent_sg: sentence scene graph
        :return: prediction, loss

        note that first dimension is images
        """
        img_num_per_gpu = len(features)

        batch_decode_logits = []
        batch_topk_decoder_logits = []
        batch_pred_similarity = []
        batch_precomp_boxes = []
        batch_topk_precomp_boxes=[]
        batch_pred_boxes = []
        batch_topk_pred_boxes = []
        batch_topk_fusion_pred_boxes = []
        batch_topk_pred_similarity = []
        batch_topk_fusion_similarity = []
        batch_boxes_targets = []
        batch_ctx_embed = []
        batch_ctx_s1_embed = []

        batch_pred_targets = []
        batch_topk_pred_targets = []


        """ Language Embedding"""
        batch_phrase_ids, batch_phrase_types, batch_phrase_embed, batch_phrase_len, \
        batch_phrase_dec_ids, batch_phrase_mask, batch_decoder_word_embed, batch_phrase_glove_embed, batch_rel_phrase_embed, batch_relation_conn, batch_sent_embed,\
        batch_decoder_rel_word_embed, batch_rel_mask, batch_rel_dec_idx = self.phrase_embed(all_sentences, all_phrase_ids, all_sent_sgs)

        h, w = features.shape[-2:]

        # self.storage = get_event_storage()


        for bid in range(img_num_per_gpu):

            """ Visual Embedding """
            precomp_boxes_bid = precomp_boxes[bid].to(self.device)  ## 100*4

            order = []
            for phr_ids in batch_phrase_ids[bid]:
                order.append(all_phrase_ids[bid].index(phr_ids))
            target_filter = targets[bid][np.array(order)]
            batch_boxes_targets.append(target_filter.to(self.device))
            batch_precomp_boxes.append(precomp_boxes_bid)

            img_feat_bid = features[[bid]]
            visual_features_bid = self.rcnn_top(self.det_roi_pooler([img_feat_bid], [precomp_boxes_bid])).mean(dim=[2, 3]).contiguous()
            if cfg.MODEL.VG.SPATIAL_FEAT:
                spa_feat = meshgrid_generation(h, w)
                spa_feat = self.det_roi_pooler([spa_feat], [precomp_boxes_bid]).view(visual_features_bid.shape[0], -1)
                spa_feat = self.spatial_trans(spa_feat)
                visual_features_bid = torch.cat((visual_features_bid, spa_feat), dim=1)

            visual_features_bid = self.visual_embedding(visual_features_bid)
            visual_features_bid = self.vis_batchnorm(visual_features_bid)

            """ Noun Phrase embedding """
            phrase_embed_bid = batch_phrase_embed[bid]
            if phrase_embed_bid.shape[0] == 1 and self.training:
                phrase_embed_bid = self.phr_batchnorm(phrase_embed_bid.repeat(2,1))[[0]]
            else:
                phrase_embed_bid = self.phr_batchnorm(phrase_embed_bid)


            """ Similarity and attention prediction """
            num_box = precomp_boxes_bid.tensor.size(0)
            num_phrase = phrase_embed_bid.size(0)
            phr_inds, obj_inds = self.make_pair(num_phrase, num_box)
            pred_similarity_bid, pred_targets_bid = self.similarity(visual_features_bid, phrase_embed_bid, obj_inds, phr_inds)
            pred_similarity_bid = pred_similarity_bid.reshape(num_phrase, num_box)
            pred_targets_bid = pred_targets_bid.reshape(num_phrase, num_box, 4)
            batch_pred_targets.append(pred_targets_bid)


            if cfg.MODEL.VG.USING_DET_KNOWLEDGE :
                det_label_embedding_bid = det_label_embedding[bid].to(self.device)
                sim = self.cal_det_label_sim_max(det_label_embedding_bid, batch_phrase_glove_embed[bid])
                pred_similarity_bid = pred_similarity_bid * sim
                sim_mask = (sim > 0).float()
                atten_bid = numerical_stability_masked_softmax(pred_similarity_bid, sim_mask, dim=1)
            else:
                atten_bid = F.softmax(pred_similarity_bid, dim=1)

            ## reconstruction visual features
            visual_reconst_bid = torch.mm(atten_bid, visual_features_bid)
            decode_phr_logits = self.phrase_decoder(visual_reconst_bid, batch_decoder_word_embed[bid])
            batch_decode_logits.append(decode_phr_logits)

            atten_score_topk, atten_ranking_topk = torch.topk(atten_bid, dim=1, k=self.s2_topk) ## (N, 10)
            ind_phr_topk = np.arange(num_phrase).repeat(self.s2_topk)


            ## -----------------------------------------------------##
            ## crop 2st features
            ## -----------------------------------------------------##

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                visual_features_topk_bid = visual_features_bid[atten_ranking_topk.reshape(-1)]
                precomp_boxes_topk_bid = precomp_boxes_bid[atten_ranking_topk.reshape(-1)]
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
            else:
                topk_box_ids = atten_ranking_topk.reshape(-1) + torch.as_tensor(ind_phr_topk, dtype=torch.long).to(self.device)*num_box
                precomp_boxes_tensor, box_size = precomp_boxes_bid.tensor, precomp_boxes_bid.size
                precomp_boxes_topk_tensor = precomp_boxes_tensor[atten_ranking_topk.reshape(-1)]  ## (N*10, 4)
                pred_targets_s0 = pred_targets_bid.view(-1, 4)[topk_box_ids]
                precomp_boxes_topk_bid = self.box2box_translation.apply_deltas(pred_targets_s0, precomp_boxes_topk_tensor)
                precomp_boxes_topk_bid = Boxes(precomp_boxes_topk_bid, box_size)
                precomp_boxes_topk_bid.clip()
                batch_topk_precomp_boxes.append(precomp_boxes_topk_bid)
                visual_features_topk_bid = self.rcnn_top(self.det_roi_pooler([img_feat_bid], [precomp_boxes_topk_bid])).mean(dim=[2, 3]).contiguous()

                if cfg.MODEL.VG.SPATIAL_FEAT:
                    spa_feat = meshgrid_generation(h, w)
                    spa_feat = self.det_roi_pooler([spa_feat], [precomp_boxes_topk_bid]).view(visual_features_topk_bid.shape[0], -1)
                    spa_feat = self.spatial_trans(spa_feat)
                    visual_features_topk_bid = torch.cat((visual_features_topk_bid, spa_feat), dim=1)

                visual_features_topk_bid = self.visual_embedding(visual_features_topk_bid)## (N*10, 1024)
                visual_features_topk_bid = self.vis_batchnorm(visual_features_topk_bid)


            pred_similarity_topk_bid, pred_targets_topk_bid = self.similarity_topk(visual_features_topk_bid, phrase_embed_bid, ind_phr_topk)
            pred_similarity_topk_bid = pred_similarity_topk_bid.reshape(num_phrase, self.s2_topk)
            pred_targets_topk_bid = pred_targets_topk_bid.reshape(num_phrase, self.s2_topk, 4)
            batch_topk_pred_targets.append(pred_targets_topk_bid)


            if cfg.MODEL.VG.USING_DET_KNOWLEDGE:
                sim_topk = torch.gather(sim, dim=1, index=atten_ranking_topk.long())
                sim_mask = (sim_topk>0).float()
                pred_similarity_topk_bid = pred_similarity_topk_bid * sim_topk
                atten_topk_bid = numerical_stability_masked_softmax(pred_similarity_topk_bid, sim_mask, dim=1)
            else:
                atten_topk_bid = F.softmax(pred_similarity_topk_bid, dim=1)

            atten_fusion = atten_topk_bid * atten_score_topk  ## N*10
            visual_features_topk_bid = visual_features_topk_bid.view(num_phrase, self.s2_topk, -1)
            visual_reconst_topk_bid = (atten_fusion.unsqueeze(2)*visual_features_topk_bid).sum(1) ## N*1024
            decoder_phr_topk_logits = self.phrase_decoder(visual_reconst_topk_bid, batch_decoder_word_embed[bid])
            batch_topk_decoder_logits.append(decoder_phr_topk_logits)


            ## construct the discriminative loss
            batch_ctx_s1_embed.append(self.visual_mlp(visual_reconst_bid.mean(0, keepdim=True)))
            batch_ctx_embed.append(self.visual_mlp(visual_reconst_topk_bid.mean(0, keepdim=True)))


            batch_pred_similarity.append(atten_bid)
            batch_topk_pred_similarity.append(atten_topk_bid)
            batch_topk_fusion_similarity.append(atten_fusion)

            ### transform boxes for stage-1
            num_phrase_indices = torch.arange(num_phrase).long().to(self.device)
            max_box_ind = atten_bid.detach().cpu().numpy().argmax(1)
            precomp_boxes_delta_max = pred_targets_bid[num_phrase_indices, max_box_ind] ## numPhrase*4

            max_topk_id = torch.topk(atten_topk_bid, dim=1, k=1)[1].long().squeeze(1)
            precomp_boxes_delta_max_topk = pred_targets_topk_bid[num_phrase_indices, max_topk_id]  ## num_phrase*4
            precomp_boxes_topk_bid_tensor = precomp_boxes_topk_bid.tensor.reshape(-1, self.s2_topk, 4)

            max_fusion_topk_id = torch.topk(atten_fusion, dim=1, k=1)[1].long().squeeze()
            precomp_boxes_delta_max_topk_fusion = pred_targets_topk_bid[num_phrase_indices, max_fusion_topk_id]  ## num_phrase*4

            phr_index = torch.arange(num_phrase).to(self.device) * self.s2_topk

            if self.storage.iter <= cfg.SOLVER.REG_START_ITER:
                max_select_boxes = precomp_boxes_bid[max_box_ind]
                max_precomp_boxes = precomp_boxes_topk_bid[max_topk_id + phr_index]
                max_fusion_precomp_boxes = precomp_boxes_topk_bid[max_fusion_topk_id + phr_index]
            else:
                max_select_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max, precomp_boxes_bid[max_box_ind].tensor), precomp_boxes_bid.size)
                max_precomp_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max_topk, precomp_boxes_topk_bid_tensor[num_phrase_indices, max_topk_id]), precomp_boxes_bid.size)
                max_fusion_precomp_boxes = Boxes(self.box2box_translation.apply_deltas(precomp_boxes_delta_max_topk_fusion, precomp_boxes_topk_bid_tensor[num_phrase_indices, max_fusion_topk_id]), precomp_boxes_bid.size)

            batch_pred_boxes.append(max_select_boxes)
            batch_topk_pred_boxes.append(max_precomp_boxes)
            batch_topk_fusion_pred_boxes.append(max_fusion_precomp_boxes)


        batch_ctx_sim, batch_ctx_sim_s1 = self.generate_image_sent_discriminative(batch_sent_embed, batch_ctx_embed, batch_ctx_s1_embed)

        noun_reconst_loss, noun_topk_reconst_loss, disc_img_sent_loss_s1, disc_img_sent_loss_s2,  reg_loss, \
        reg_loss_s1 = self.VGLoss(batch_phrase_mask, batch_decode_logits, batch_topk_decoder_logits, batch_phrase_dec_ids,
                                  batch_ctx_sim, batch_ctx_sim_s1, batch_pred_similarity, batch_topk_pred_similarity, batch_boxes_targets, batch_precomp_boxes,
                                  batch_pred_targets, batch_topk_pred_targets,
                                  batch_topk_precomp_boxes)

        all_loss = dict(noun_reconst_loss=noun_reconst_loss, noun_topk_reconst_loss=noun_topk_reconst_loss, disc_img_sent_loss_s1=disc_img_sent_loss_s1,
                        disc_img_sent_loss_s2=disc_img_sent_loss_s2, reg_loss_s1=reg_loss, reg_loss_s2=reg_loss_s1)


        if self.training:
            return all_loss, None
        else:
            return all_loss, (batch_phrase_ids, batch_phrase_types, move2cpu(batch_pred_boxes), move2cpu(batch_pred_similarity),
                              move2cpu(batch_boxes_targets), move2cpu(batch_precomp_boxes), image_unique_id, move2cpu(batch_topk_pred_similarity),
                              move2cpu(batch_topk_fusion_similarity), move2cpu(batch_topk_pred_boxes), move2cpu(batch_topk_fusion_pred_boxes),
                              move2cpu(batch_topk_precomp_boxes), move2cpu(batch_topk_pred_targets), move2cpu(batch_pred_targets))