Exemplo n.º 1
0
    def get_scalars_with_smooth(self, window_size=30):
        result = imixEasyDict()
        for key, value in self._scalars.items():
            result[key] = self._history[key].median(
                window_size) if self._smoothing_hints[key] else value

        return result
Exemplo n.º 2
0
    def __init__(self, optimizer, *args, **kwargs):
        self._lambda_func = PythiaScheduler.lr_lambda_update

        from imix.utils.config import imixEasyDict
        self._global_config = imixEasyDict({'lr_config': {k: v for k, v in kwargs.items()}})
        kwargs = {}
        super().__init__(optimizer, self.lr_lambda, *args, **kwargs)
Exemplo n.º 3
0
 def get_param() -> imixEasyDict:
     param = imixEasyDict()
     param.optimizer_cfg = copy.deepcopy(cfg)
     param.type = param.optimizer_cfg.pop('constructor',
                                          'DefaultOptimizerConstructor')
     param.paramwise_cfg = param.optimizer_cfg.pop('paramwise_cfg', None)
     return param
Exemplo n.º 4
0
        def process(image_feature):
            image_info = imixEasyDict()
            image_loc, image_dim = image_feature.shape
            tmp_image_feat = np.zeros((self.max_loc, image_dim),
                                      dtype=np.float32)
            tmp_image_feat[0:image_loc, ] = image_feature[:self.max_loc, :]
            image_info.image_feature = torch.from_numpy(tmp_image_feat)
            image_info.max_features = torch.tensor(image_loc, dtype=torch.long)

            return image_info
Exemplo n.º 5
0
    def forward_train(self, examples, **kwargs):
        train_features = [
            convert_example_to_features(example, self.max_seq_length,
                                        self.tokenizer) for example in examples
        ]

        # language Inputs
        input_ids = torch.tensor([f.input_ids for f in train_features],
                                 dtype=torch.long).cuda()
        input_mask = torch.tensor([f.input_mask for f in train_features],
                                  dtype=torch.long).cuda()
        segment_ids = torch.tensor([f.segment_ids for f in train_features],
                                   dtype=torch.long).cuda()

        # Visual Inputs
        feats = torch.from_numpy(
            np.stack([f.visual_feats[0] for f in train_features])).cuda()
        pos = torch.from_numpy(
            np.stack([f.visual_feats[1] for f in train_features])).cuda()

        # Language Prediction
        lm_labels = torch.tensor([f.lm_label_ids for f in train_features],
                                 dtype=torch.long).cuda()

        # Visual Prediction
        obj_labels = {}
        for key in ('obj', 'attr', 'feat'):
            visn_labels = torch.from_numpy(
                np.stack([f.obj_labels[key][0]
                          for f in train_features])).cuda()
            visn_mask = torch.from_numpy(
                np.stack([f.obj_labels[key][1]
                          for f in train_features])).cuda()
            assert visn_labels.size(0) == visn_mask.size(
                0) and visn_labels.size(1) == visn_mask.size(1)
            obj_labels[key] = (visn_labels, visn_mask)

        # Joint Prediction
        matched_labels = torch.tensor([f.is_matched for f in train_features],
                                      dtype=torch.long).cuda()
        ans = torch.from_numpy(np.stack([f.ans
                                         for f in train_features])).cuda()

        loss, losses, answer_score_logit = self.model(input_ids, segment_ids,
                                                      input_mask, lm_labels,
                                                      feats, pos, obj_labels,
                                                      matched_labels, ans)

        output = imixEasyDict()
        output.loss = loss  # total loss
        output.losses = losses  # every loss
        output.scores = answer_score_logit

        return output
Exemplo n.º 6
0
    def _paramwise_cfg_params(self):
        paramwise = imixEasyDict()

        paramwise.bias_lr_mult = getattr(self.paramwise_cfg, 'bias_lr_mult', 1.0)
        paramwise.bias_decay_mult = getattr(self.paramwise_cfg, 'bias_decay_mult', 1.0)
        paramwise.norm_decay_mult = getattr(self.paramwise_cfg, 'norm_decay_mult', 1.0)
        paramwise.dwconv_decay_mult = getattr(self.paramwise_cfg, 'dwconv_decay_mult', 1.0)
        paramwise.bypass_duplicate = getattr(self.paramwise_cfg, 'bypass_duplicate', False)

        paramwise.custom_keys = getattr(self.paramwise_cfg, 'custom_keys', {})
        paramwise.sorted_keys = sorted(sorted(paramwise.custom_keys.keys()), key=len, reverse=True)

        return paramwise
Exemplo n.º 7
0
    def get_cfg_param(data_cfg):
        params = imixEasyDict()

        params.batch_size = getattr(data_cfg, 'samples_per_gpu')
        params.num_workers = getattr(data_cfg, 'workers_per_gpu')
        params.drop_last = getattr(data_cfg, 'drop_last', False)
        params.pin_memory = getattr(data_cfg, 'pin_memory', False)
        params.sampler_cfg = getattr(data_cfg, 'sampler', None)
        params.batch_sampler_cfg = getattr(data_cfg, 'batch_sampler', None)
        params.shuffle = getattr(data_cfg, 'shuffle', False)
        params.collate_fn = getattr(data_cfg, 'collate_fn', None)
        params.worker_init_fn = worker_init_fn

        return params
Exemplo n.º 8
0
def build_data_loader_by_epoch(dataset, cfg, is_training=True):
    def get_cfg_param(data_cfg):
        params = imixEasyDict()

        params.batch_size = getattr(data_cfg, 'samples_per_gpu')
        params.num_workers = getattr(data_cfg, 'workers_per_gpu')
        params.drop_last = getattr(data_cfg, 'drop_last', False)
        params.pin_memory = getattr(data_cfg, 'pin_memory', False)
        params.sampler_cfg = getattr(data_cfg, 'sampler', None)
        params.batch_sampler_cfg = getattr(data_cfg, 'batch_sampler', None)
        params.shuffle = getattr(data_cfg, 'shuffle', False)
        params.collate_fn = getattr(data_cfg, 'collate_fn', None)
        params.worker_init_fn = worker_init_fn

        return params

    params = get_cfg_param(cfg.train_data if is_training else cfg.test_data)
    sampler_cfg, batch_sampler_cfg = params.sampler_cfg, params.batch_sampler_cfg

    dataloader_param = {
        'dataset': dataset,
        'pin_memory': params.pin_memory,
        'num_workers': params.num_workers,
        'collate_fn': eval(params.collate_fn) if params.collate_fn else None,
    }

    if batch_sampler_cfg:
        batch_sampler = build_batch_sampler(batch_sampler_cfg,
                                            default_args={'dataset': dataset})
        dataloader_param.update({'batch_sampler': batch_sampler})
    else:
        if sampler_cfg:
            sampler_cfg = imixEasyDict({'type': sampler_cfg}) if isinstance(
                sampler_cfg, str) else sampler_cfg
            sampler = build_sampler(sampler_cfg,
                                    default_args={'dataset': dataset})
            dataloader_param.update({'sampler': sampler})

        dataloader_param.update({
            'batch_size': params.batch_size,
            'drop_last': params.drop_last,
            'shuffle': params.shuffle,
        })

    return DataLoader(**dataloader_param)
Exemplo n.º 9
0
    def add_ocr_info(self, item_feature: ItemFeature, sample: ItemFeature):
        sample_info = item_feature

        if not self.use_ocr:
            # remove all OCRs from the sample
            # (i.e. make an empty OCR list)
            sample_info['ocr_tokens'] = []
            sample_info['ocr_info'] = []
            if 'ocr_normalized_boxes' in sample_info:
                sample_info['ocr_normalized_boxes'] = np.zeros((0, 4),
                                                               np.float32)
            # clear OCR visual features
            if 'image_feature_1' in sample:
                sample.image_feature_1 = torch.zeros_like(
                    sample.image_feature_1)
            return sample

            # Preprocess OCR tokens
        if hasattr(self, 'ocr_token_processor'):
            ocr_tokens = [
                self.ocr_token_processor({'text': token})['text']
                for token in sample_info['ocr_tokens']
            ]
        else:
            ocr_tokens = sample_info['ocr_tokens']
            # Get FastText embeddings for OCR tokens
        context = self.context_processor({'tokens': ocr_tokens})
        sample.context = context['text']
        sample.ocr_tokens = context['tokens']

        sample.context_tokens = object_to_byte_tensor(context['tokens'])
        sample.context_feature_0 = context['text']
        sample.context_info_0 = imixEasyDict()
        sample.context_info_0.max_features = context['length']

        # Get PHOC embeddings for OCR tokens
        if hasattr(self, 'phoc_processor'):
            if self.phoc_processor is None:
                if item_feature.context_phoc is None:
                    phoc_file_name = f'{item_feature.set_name}_qid_{item_feature.question_id}.json'
                    context_phoc = self.get_phoc_feature(
                        file_name=phoc_file_name)
                else:
                    context_phoc = item_feature.context_phoc

                sample.context_feature_1 = torch.Tensor(context_phoc['text'])
                sample.context_info_1 = imixEasyDict()
                sample.context_info_1.max_features = torch.tensor(
                    context_phoc['length'])
            else:
                context_phoc = self.phoc_processor({'tokens': ocr_tokens})
                sample.context_feature_1 = context_phoc['text']
                sample.context_info_1 = imixEasyDict()
                sample.context_info_1.max_features = context_phoc['length']

        # OCR order vectors
        if self.cfg.get('use_order_vectors', False):
            order_vectors = np.eye(len(sample.ocr_tokens), dtype=np.float32)
            order_vectors = torch.from_numpy(order_vectors)
            order_vectors[context['length']:] = 0
            sample.order_vectors = order_vectors

        # OCR bounding box information
        if 'ocr_normalized_boxes' in sample_info and hasattr(
                self, 'copy_processor'):
            # New imdb format: OCR bounding boxes are already pre-computed
            max_len = self.cfg.answer_processor.config.max_length
            sample.ocr_bbox_coordinates = self.copy_processor(
                {'blob':
                 sample_info['ocr_normalized_boxes']})['blob'][:max_len]
        elif self.use_ocr_info and 'ocr_info' in sample_info:
            # Old imdb format: OCR bounding boxes are computed on-the-fly
            # from ocr_info
            sample.ocr_bbox_coordinates = self.bbox_processor(
                {'info': sample_info['ocr_info']})['bbox'].coordinates

        return sample
Exemplo n.º 10
0
 def clear_scalars(self):
     self._scalars = imixEasyDict()
Exemplo n.º 11
0
 def __init__(self):
     self._scalars = imixEasyDict()
     self._history = defaultdict(HistoryBuffer)
     self._smoothing_hints = imixEasyDict()
Exemplo n.º 12
0
# answer_table = json.load(open('/home/datasets/mix_data/lxmert/vqa/trainval_label2ans.json'))

model_root_path = openchat_path + '/model_pth/'
model_vqa_path = dict(
    lxmert=dict(
        model_weight=model_root_path + 'lxmert_vqa.pth',
        answer_table=dataset_root + 'lxmert/vqa/trainval_label2ans.json'),
    vilbert=dict(
        model_weight=model_root_path + 'vilbert_vqa.pth',
        answer_table=dataset_root + 'vilbert/datasets/VQA/cache/trainval_label2ans.json',
        token=dict(pretrained_model_name_or_path='bert-base-uncased', do_lower_case=True)),
    oscar=dict(
        model_weight=model_root_path + 'oscar_vqa.pth',
        answer_table=dataset_root + 'vilbert/datasets/VQA/cache/trainval_label2ans.json',
        token=dict(
            pretrained_model_name_or_path=dataset_root + 'model/oscar/base-vg-labels/ep_107_1192087',
            do_lower_case=True)),
    uniter=dict(
        model_weight=model_root_path + 'uniter_vqa.pth',
        answer_table=dataset_root + 'vilbert/datasets/VQA/cache/trainval_label2ans.json',
        token=dict(pretrained_model_name_or_path='bert-base-uncased', do_lower_case=True)),
    vinvl=dict(
        model_weight=model_root_path + 'vinvl_vqa.pth',
        answer_table=dataset_root + 'vilbert/datasets/VQA/cache/trainval_label2ans.json',
        token=dict(
            pretrained_model_name_or_path=dataset_root + 'model/oscar/base-vg-labels/ep_107_1192087',
            do_lower_case=True)),
)

model_vqa_path = imixEasyDict(model_vqa_path)
Exemplo n.º 13
0
    def forward(self,
                input_ids,
                token_type_ids=None,
                attention_mask=None,
                masked_lm_labels=None,
                visual_feats=None,
                pos=None,
                obj_labels=None,
                matched_label=None,
                ans=None):
        (lang_output, visn_output), pooled_output = self.bert(
            input_ids,
            token_type_ids,
            attention_mask,
            visual_feats=(visual_feats, pos),
        )

        lang_prediction_scores, cross_relationship_score = self.cls(
            lang_output, pooled_output)
        if self.task_qa:
            answer_score = self.answer_head(pooled_output)
        else:
            # This answer_score would not be used anywhere,
            # just to keep a constant return function signature.
            answer_score = pooled_output[0][0]

        total_loss = 0.
        loss_fct = CrossEntropyLoss(ignore_index=-1)
        losses = imixEasyDict()
        if masked_lm_labels is not None and self.task_mask_lm:
            masked_lm_loss = loss_fct(
                lang_prediction_scores.view(-1, self.config.vocab_size),
                masked_lm_labels.view(-1))
            total_loss += masked_lm_loss
            # losses += (masked_lm_loss.detach(),)
            losses.masked_lm_loss = masked_lm_loss
        if matched_label is not None and self.task_matched:
            matched_loss = loss_fct(cross_relationship_score.view(-1, 2),
                                    matched_label.view(-1))
            total_loss += matched_loss
            # losses += (matched_loss.detach(),)
            losses.matched_loss = matched_loss
        if obj_labels is not None and self.task_obj_predict:
            loss_fcts = {
                'l2': SmoothL1Loss(reduction='none'),
                'ce': CrossEntropyLoss(ignore_index=-1, reduction='none')
            }
            total_visn_loss = 0.
            visn_prediction_scores_dict = self.obj_predict_head(visn_output)
            for key in VISUAL_CONFIG.visual_losses:
                label, mask_conf = obj_labels[key]
                output_dim, loss_fct_name, label_shape, weight = VISUAL_CONFIG.visual_loss_config[
                    key]
                visn_loss_fct = loss_fcts[loss_fct_name]
                visn_prediction_scores = visn_prediction_scores_dict[key]
                visn_loss = visn_loss_fct(
                    visn_prediction_scores.view(-1, output_dim),
                    label.view(*label_shape),
                )
                if visn_loss.dim() > 1:  # Regression Losses
                    visn_loss = visn_loss.mean(1)
                visn_loss = (visn_loss * mask_conf.view(-1)).mean() * weight
                total_visn_loss += visn_loss
                # losses += (visn_loss.detach(),)
                losses[f'{key}_visn_loss'] = visn_loss
            total_loss += total_visn_loss
        if ans is not None and self.task_qa:
            answer_loss = loss_fct(answer_score.view(-1, self.num_answers),
                                   ans.view(-1))
            # Since this Github version pre-trains with QA loss from the beginning,
            # I exclude "*2" here to match the effect of QA losses.
            # Previous: (loss *0) for 6 epochs, (loss *2) for 6 epochs.   (Used 10 instead of 6 in EMNLP paper)
            # Now     : (loss *1) for 12 epochs
            #
            # * 2       # Multiply by 2 because > half of the data will not have label
            total_loss += answer_loss
            # losses += (answer_loss.detach(),)
            losses.answer_loss = answer_loss

        return total_loss, losses, answer_score.detach()