示例#1
0
文件: data.py 项目: ictnlp/BoN-NAT
    def prepare_train_data(self, dataPath, annFile, batch_size, max_len=None, size=None):
        bpes, features_path, bpe2img, img2bpes = process_json(dataPath, annFile, max_len=max_len, size=size)

        # get max len of dataset
        self.max_dataset_length = 0
        for bpe in bpes:
            len_bpe = len(bpe.split(' '))
            if len_bpe > self.max_dataset_length:
                self.max_dataset_length = len_bpe

        dataset_captions = CocoCaptionsIndexedCaption(bpes, features_path, bpe2img, img2bpes)
        sampler_captions = BatchSamplerCaptionsSameLength(dataset_captions, batch_size=batch_size)
        return dataset_captions, sampler_captions
示例#2
0
    def prepare_distill_data(self,
                             dataPath,
                             annFile,
                             batch_size,
                             max_len=None,
                             size=None):
        bpes, features_path, bpe2img, img2bpes = process_json(dataPath,
                                                              annFile,
                                                              max_len=max_len,
                                                              size=size)

        dataset_images = CocoCaptionsIndexedImageDistill(
            bpes, features_path, bpe2img, img2bpes)
        sampler_images = BatchSamplerImagesSameLength(dataset_images,
                                                      batch_size=batch_size)
        return dataset_images, sampler_images