示例#1
0
    def __init__(self,
                 image_path=None,
                 label_path=None,
                 chw_format=True,
                 mode='train',
                 transform=None,
                 download=True):
        assert mode.lower() in ['train', 'test'], \
                "mode should be 'train' or 'test', but got {}".format(mode)
        self.mode = mode.lower()
        self.chw_format = chw_format
        self.image_path = image_path
        if self.image_path is None:
            assert download, "image_path is not set and downloading automatically is disabled"
            image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL
            image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5
            self.image_path = _check_exists_and_download(
                image_path, image_url, image_md5, 'mnist', download)

        self.label_path = label_path
        if self.label_path is None:
            assert download, "label_path is not set and downloading automatically is disabled"
            label_url = TRAIN_LABEL_URL if self.mode == 'train' else TEST_LABEL_URL
            label_md5 = TRAIN_LABEL_MD5 if self.mode == 'train' else TEST_LABEL_MD5
            self.label_path = _check_exists_and_download(
                label_path, label_url, label_md5, 'mnist', download)

        self.transform = transform

        # read dataset into memory
        self._parse_dataset()
示例#2
0
    def __init__(self,
                 data_file=None,
                 label_file=None,
                 setid_file=None,
                 mode='train',
                 transform=None,
                 download=True):
        assert mode.lower() in ['train', 'valid', 'test'], \
                "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
        self.flag = MODE_FLAG_MAP[mode.lower()]

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, DATA_URL, DATA_MD5, 'flowers', download)

        self.label_file = label_file
        if self.label_file is None:
            assert download, "label_file is not set and downloading automatically is disabled"
            self.label_file = _check_exists_and_download(
                label_file, LABEL_URL, LABEL_MD5, 'flowers', download)

        self.setid_file = setid_file
        if self.setid_file is None:
            assert download, "setid_file is not set and downloading automatically is disabled"
            self.setid_file = _check_exists_and_download(
                setid_file, SETID_URL, SETID_MD5, 'flowers', download)

        self.transform = transform

        # read dataset into memory
        self._load_anno()
示例#3
0
    def func_test_errors(self):
        with self.assertRaises(RuntimeError):
            ImageFolder(self.empty_dir)
        with self.assertRaises(RuntimeError):
            DatasetFolder(self.empty_dir)

        with self.assertRaises(ValueError):
            _check_exists_and_download('temp_paddle', None, None, None, False)
示例#4
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 src_dict_size=-1,
                 trg_dict_size=-1,
                 lang='en',
                 download=True):
        assert mode.lower() in ['train', 'test', 'val'], \
            "mode should be 'train', 'test' or 'val', but got {}".format(mode)
        self.mode = mode.lower()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, DATA_URL, DATA_MD5, 'wmt16', download)

        self.lang = lang
        assert src_dict_size > 0, "dict_size should be set as positive number"
        assert trg_dict_size > 0, "dict_size should be set as positive number"
        self.src_dict_size = min(
            src_dict_size,
            (TOTAL_EN_WORDS if lang == "en" else TOTAL_DE_WORDS))
        self.trg_dict_size = min(
            trg_dict_size,
            (TOTAL_DE_WORDS if lang == "en" else TOTAL_EN_WORDS))

        # load source and target word dict
        self.src_dict = self._load_dict(lang, src_dict_size)
        self.trg_dict = self._load_dict("de" if lang == "en" else "en",
                                        trg_dict_size)

        # load data
        self.data = self._load_data()
示例#5
0
    def __init__(self,
                 data_file=None,
                 data_type='NGRAM',
                 window_size=-1,
                 mode='train',
                 min_word_freq=50,
                 download=True):
        assert data_type.upper() in ['NGRAM', 'SEQ'], \
            "data type should be 'NGRAM', 'SEQ', but got {}".format(data_type)
        self.data_type = data_type.upper()

        assert mode.lower() in ['train', 'test'], \
            "mode should be 'train', 'test', but got {}".format(mode)
        self.mode = mode.lower()

        self.window_size = window_size
        self.min_word_freq = min_word_freq

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically disabled"
            self.data_file = _check_exists_and_download(data_file, URL, MD5,
                                                        'imikolov', download)

        # Build a word dictionary from the corpus
        self.word_idx = self._build_work_dict(min_word_freq)

        # read dataset into memory
        self._load_anno()
示例#6
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 transform=None,
                 download=True,
                 backend=None):
        assert mode.lower() in ['train', 'valid', 'test'], \
            "mode should be 'train', 'valid' or 'test', but got {}".format(mode)

        if backend is None:
            backend = paddle.vision.get_image_backend()
        if backend not in ['pil', 'cv2']:
            raise ValueError(
                "Expected backend are one of ['pil', 'cv2'], but got {}"
                .format(backend))
        self.backend = backend

        self.flag = MODE_FLAG_MAP[mode.lower()]

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, VOC_URL, VOC_MD5, CACHE_DIR, download)
        self.transform = transform

        # read dataset into memory
        self._load_anno()

        self.dtype = paddle.get_default_dtype()
示例#7
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 transform=None,
                 download=True,
                 backend=None):
        assert mode.lower() in ['train', 'test', 'train', 'test'], \
            "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode)
        self.mode = mode.lower()

        if backend is None:
            backend = paddle.vision.get_image_backend()
        if backend not in ['pil', 'cv2']:
            raise ValueError(
                "Expected backend are one of ['pil', 'cv2'], but got {}".
                format(backend))
        self.backend = backend

        self._init_url_md5_flag()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, self.data_url, self.data_md5, 'cifar', download)

        self.transform = transform

        # read dataset into memory
        self._load_data()

        self.dtype = paddle.get_default_dtype()
示例#8
0
    def __init__(self,
                 data_file=None,
                 label_file=None,
                 setid_file=None,
                 mode='train',
                 transform=None,
                 download=True,
                 backend=None):
        assert mode.lower() in ['train', 'valid', 'test'], \
                "mode should be 'train', 'valid' or 'test', but got {}".format(mode)

        if backend is None:
            backend = paddle.vision.get_image_backend()
        if backend not in ['pil', 'cv2']:
            raise ValueError(
                "Expected backend are one of ['pil', 'cv2'], but got {}"
                .format(backend))
        self.backend = backend

        flag = MODE_FLAG_MAP[mode.lower()]

        if not data_file:
            assert download, "data_file is not set and downloading automatically is disabled"
            data_file = _check_exists_and_download(
                data_file, DATA_URL, DATA_MD5, 'flowers', download)

        if not label_file:
            assert download, "label_file is not set and downloading automatically is disabled"
            label_file = _check_exists_and_download(
                label_file, LABEL_URL, LABEL_MD5, 'flowers', download)

        if not setid_file:
            assert download, "setid_file is not set and downloading automatically is disabled"
            setid_file = _check_exists_and_download(
                setid_file, SETID_URL, SETID_MD5, 'flowers', download)

        self.transform = transform

        data_tar = tarfile.open(data_file)
        self.data_path = data_file.replace(".tgz", "/")
        if not os.path.exists(self.data_path):
            os.mkdir(self.data_path)
        data_tar.extractall(self.data_path)

        scio = try_import('scipy.io')
        self.labels = scio.loadmat(label_file)['labels'][0]
        self.indexes = scio.loadmat(setid_file)[flag][0]
示例#9
0
    def __init__(self,
                 data_file=None,
                 word_dict_file=None,
                 verb_dict_file=None,
                 target_dict_file=None,
                 emb_file=None,
                 download=True):
        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, DATA_URL, DATA_MD5, 'conll05st', download)

        self.word_dict_file = word_dict_file
        if self.word_dict_file is None:
            assert download, "word_dict_file is not set and downloading automatically is disabled"
            self.word_dict_file = _check_exists_and_download(
                word_dict_file, WORDDICT_URL, WORDDICT_MD5, 'conll05st',
                download)

        self.verb_dict_file = verb_dict_file
        if self.verb_dict_file is None:
            assert download, "verb_dict_file is not set and downloading automatically is disabled"
            self.verb_dict_file = _check_exists_and_download(
                verb_dict_file, VERBDICT_URL, VERBDICT_MD5, 'conll05st',
                download)

        self.target_dict_file = target_dict_file
        if self.target_dict_file is None:
            assert download, "target_dict_file is not set and downloading automatically is disabled"
            self.target_dict_file = _check_exists_and_download(
                target_dict_file, TRGDICT_URL, TRGDICT_MD5, 'conll05st',
                download)

        self.emb_file = emb_file
        if self.emb_file is None:
            assert download, "emb_file is not set and downloading automatically is disabled"
            self.emb_file = _check_exists_and_download(emb_file, EMB_URL,
                                                       EMB_MD5, 'conll05st',
                                                       download)

        self.word_dict = self._load_dict(self.word_dict_file)
        self.predicate_dict = self._load_dict(self.verb_dict_file)
        self.label_dict = self._load_label_dict(self.target_dict_file)

        # read dataset into memory
        self._load_anno()
示例#10
0
    def __init__(self,
                 image_path=None,
                 label_path=None,
                 mode='train',
                 transform=None,
                 download=True,
                 backend=None):
        assert mode.lower() in ['train', 'test'], \
                "mode should be 'train' or 'test', but got {}".format(mode)

        if backend is None:
            backend = paddle.vision.get_image_backend()
        if backend not in ['pil', 'cv2']:
            raise ValueError(
                "Expected backend are one of ['pil', 'cv2'], but got {}".
                format(backend))
        self.backend = backend

        self.mode = mode.lower()
        self.image_path = image_path
        if self.image_path is None:
            assert download, "image_path is not set and downloading automatically is disabled"
            image_url = self.TRAIN_IMAGE_URL if mode == 'train' else self.TEST_IMAGE_URL
            image_md5 = self.TRAIN_IMAGE_MD5 if mode == 'train' else self.TEST_IMAGE_MD5
            self.image_path = _check_exists_and_download(
                image_path, image_url, image_md5, self.NAME, download)

        self.label_path = label_path
        if self.label_path is None:
            assert download, "label_path is not set and downloading automatically is disabled"
            label_url = self.TRAIN_LABEL_URL if self.mode == 'train' else self.TEST_LABEL_URL
            label_md5 = self.TRAIN_LABEL_MD5 if self.mode == 'train' else self.TEST_LABEL_MD5
            self.label_path = _check_exists_and_download(
                label_path, label_url, label_md5, self.NAME, download)

        self.transform = transform

        # read dataset into memory
        self._parse_dataset()

        self.dtype = paddle.get_default_dtype()
示例#11
0
    def __init__(self, data_file=None, mode='train', download=True):
        assert mode.lower() in ['train', 'test'], \
                "mode should be 'train' or 'test', but got {}".format(mode)
        self.mode = mode.lower()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, URL, MD5, 'uci_housing', download)

        # read dataset into memory
        self._load_data()
示例#12
0
    def __init__(self, data_file=None, mode='train', cutoff=150, download=True):
        assert mode.lower() in ['train', 'test'], \
            "mode should be 'train', 'test', but got {}".format(mode)
        self.mode = mode.lower()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(data_file, URL, MD5,
                                                        'imdb', download)

        # Build a word dictionary from the corpus
        self.word_idx = self._build_work_dict(cutoff)

        # read dataset into memory
        self._load_anno()
示例#13
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 transform=None,
                 download=True):
        assert mode.lower() in ['train', 'valid', 'test'], \
            "mode should be 'train', 'valid' or 'test', but got {}".format(mode)
        self.flag = MODE_FLAG_MAP[mode.lower()]

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, VOC_URL, VOC_MD5, CACHE_DIR, download)
        self.transform = transform

        # read dataset into memory
        self._load_anno()
示例#14
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 dict_size=-1,
                 download=True):
        assert mode.lower() in ['train', 'test', 'gen'], \
            "mode should be 'train', 'test' or 'gen', but got {}".format(mode)
        self.mode = mode.lower()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, URL_TRAIN, MD5_TRAIN, 'wmt14', download)

        # read dataset into memory
        assert dict_size > 0, "dict_size should be set as positive number"
        self.dict_size = dict_size
        self._load_data()
示例#15
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 transform=None,
                 download=True):
        assert mode.lower() in ['train', 'test', 'train', 'test'], \
            "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode)
        self.mode = mode.lower()

        self._init_url_md5_flag()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, self.data_url, self.data_md5, 'cifar', download)

        self.transform = transform

        # read dataset into memory
        self._load_data()
示例#16
0
    def __init__(self,
                 data_file=None,
                 mode='train',
                 test_ratio=0.1,
                 rand_seed=0,
                 download=True):
        assert mode.lower() in ['train', 'test'], \
            "mode should be 'train', 'test', but got {}".format(mode)
        self.mode = mode.lower()

        self.data_file = data_file
        if self.data_file is None:
            assert download, "data_file is not set and downloading automatically is disabled"
            self.data_file = _check_exists_and_download(
                data_file, URL, MD5, 'sentiment', download)

        self.test_ratio = test_ratio
        self.rand_seed = rand_seed

        np.random.seed(rand_seed)
        self._load_meta_info()
        self._load_data()