Exemplo n.º 1
0
Arquivo: m4c.py Projeto: hahaxun/mmf
    def _build_ocr_encoding(self):
        self.remove_ocr_fasttext = getattr(self.config.ocr,
                                           "remove_ocr_fasttext", False)
        self.remove_ocr_phoc = getattr(self.config.ocr, "remove_ocr_phoc",
                                       False)
        self.remove_ocr_frcn = getattr(self.config.ocr, "remove_ocr_frcn",
                                       False)
        self.remove_ocr_semantics = getattr(self.config.ocr,
                                            "remove_ocr_semantics", False)
        self.remove_ocr_bbox = getattr(self.config.ocr, "remove_ocr_bbox",
                                       False)

        # OCR appearance feature: Faster R-CNN
        self.ocr_faster_rcnn_fc7 = build_image_encoder(
            self._build_encoder_config(), direct_features=True)
        self.finetune_modules.append({
            "module": self.ocr_faster_rcnn_fc7,
            "lr_scale": self.config.lr_scale_frcn
        })

        self.linear_ocr_feat_to_mmt_in = nn.Linear(self.config.ocr.mmt_in_dim,
                                                   self.mmt_config.hidden_size)

        # OCR location feature: relative bounding box coordinates (4-dim)
        self.linear_ocr_bbox_to_mmt_in = nn.Linear(4,
                                                   self.mmt_config.hidden_size)

        self.ocr_feat_layer_norm = nn.LayerNorm(self.mmt_config.hidden_size)
        self.ocr_bbox_layer_norm = nn.LayerNorm(self.mmt_config.hidden_size)
        self.ocr_drop = nn.Dropout(self.config.ocr.dropout_prob)
Exemplo n.º 2
0
    def _init_feature_encoders(self, attr: str):
        feat_encoder = self.config[attr + "_feature_encodings"]
        feature_dim = self.config[attr + "_feature_dim"]
        setattr(self, attr + "_feature_dim", feature_dim)

        feat_encoder_config = copy.deepcopy(feat_encoder)
        with omegaconf.open_dict(feat_encoder_config):
            feat_encoder_config.params.model_data_dir = self.config.model_data_dir
            feat_encoder_config.params.in_dim = feature_dim
        feat_model = build_image_encoder(feat_encoder_config, direct_features=True)

        setattr(self, attr + "_feature_dim", feat_model.out_dim)
        setattr(self, attr + "_feature_encoders", feat_model)
Exemplo n.º 3
0
    def _init_feature_projectors(self, attr):
        feature_projectors = []
        feat_encoders_list_config = self.config[attr + "_feature_projections"]
        feat_dim = getattr(self, attr + "_feature_dim")

        for feat_encoder in feat_encoders_list_config:
            feat_encoder_config = copy.deepcopy(feat_encoder)
            feat_encoder_config.params.in_dim = feat_dim
            feat_model = build_image_encoder(feat_encoder_config,
                                             direct_features=True)

            feature_projectors.append(feat_model)
            setattr(self, attr + "_feature_dim", feat_model.out_dim)

        setattr(self, attr + "_feature_projectors",
                nn.ModuleList(feature_projectors))
Exemplo n.º 4
0
    def _init_feature_encoders(self, attr):
        feat_encoders = []
        feat_encoders_list_config = self.config[attr + "_feature_encodings"]
        feature_dim = self.config[attr + "_feature_dim"]
        setattr(self, attr + "_feature_dim", feature_dim)

        for feat_encoder in feat_encoders_list_config:
            feat_encoder_config = copy.deepcopy(feat_encoder)
            with omegaconf.open_dict(feat_encoder_config):
                feat_encoder_config.params.model_data_dir = self.config.model_data_dir
                feat_encoder_config.params.in_dim = feature_dim
            feat_model = build_image_encoder(feat_encoder_config,
                                             direct_features=True)
            feat_encoders.append(feat_model)
            setattr(self, attr + "_feature_dim", feat_model.out_dim)

        setattr(self, attr + "_feature_encoders", nn.ModuleList(feat_encoders))
Exemplo n.º 5
0
Arquivo: m4c.py Projeto: hahaxun/mmf
    def _build_obj_encoding(self):
        # object appearance feature: Faster R-CNN
        self.obj_faster_rcnn_fc7 = build_image_encoder(
            self._build_encoder_config(), direct_features=True)
        # apply smaller lr to pretrained Faster R-CNN fc7
        self.finetune_modules.append({
            "module": self.obj_faster_rcnn_fc7,
            "lr_scale": self.config.lr_scale_frcn
        })
        self.linear_obj_feat_to_mmt_in = nn.Linear(self.config.obj.mmt_in_dim,
                                                   self.mmt_config.hidden_size)

        # object location feature: relative bounding box coordinates (4-dim)
        self.linear_obj_bbox_to_mmt_in = nn.Linear(4,
                                                   self.mmt_config.hidden_size)

        self.obj_feat_layer_norm = nn.LayerNorm(self.mmt_config.hidden_size)
        self.obj_bbox_layer_norm = nn.LayerNorm(self.mmt_config.hidden_size)
        self.obj_drop = nn.Dropout(self.config.obj.dropout_prob)
Exemplo n.º 6
0
 def _build_modal_encoder(self, config):
     return build_image_encoder(
         config, direct_features=self._is_direct_features_input)