def __init__(self, image_path=None, label_path=None, chw_format=True, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) self.mode = mode.lower() self.chw_format = chw_format self.image_path = image_path if self.image_path is None: assert download, "image_path is not set and downloading automatically is disabled" image_url = TRAIN_IMAGE_URL if mode == 'train' else TEST_IMAGE_URL image_md5 = TRAIN_IMAGE_MD5 if mode == 'train' else TEST_IMAGE_MD5 self.image_path = _check_exists_and_download( image_path, image_url, image_md5, 'mnist', download) self.label_path = label_path if self.label_path is None: assert download, "label_path is not set and downloading automatically is disabled" label_url = TRAIN_LABEL_URL if self.mode == 'train' else TEST_LABEL_URL label_md5 = TRAIN_LABEL_MD5 if self.mode == 'train' else TEST_LABEL_MD5 self.label_path = _check_exists_and_download( label_path, label_url, label_md5, 'mnist', download) self.transform = transform # read dataset into memory self._parse_dataset()
def __init__(self, data_file=None, label_file=None, setid_file=None, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) self.flag = MODE_FLAG_MAP[mode.lower()] self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, DATA_URL, DATA_MD5, 'flowers', download) self.label_file = label_file if self.label_file is None: assert download, "label_file is not set and downloading automatically is disabled" self.label_file = _check_exists_and_download( label_file, LABEL_URL, LABEL_MD5, 'flowers', download) self.setid_file = setid_file if self.setid_file is None: assert download, "setid_file is not set and downloading automatically is disabled" self.setid_file = _check_exists_and_download( setid_file, SETID_URL, SETID_MD5, 'flowers', download) self.transform = transform # read dataset into memory self._load_anno()
def func_test_errors(self): with self.assertRaises(RuntimeError): ImageFolder(self.empty_dir) with self.assertRaises(RuntimeError): DatasetFolder(self.empty_dir) with self.assertRaises(ValueError): _check_exists_and_download('temp_paddle', None, None, None, False)
def __init__(self, data_file=None, mode='train', src_dict_size=-1, trg_dict_size=-1, lang='en', download=True): assert mode.lower() in ['train', 'test', 'val'], \ "mode should be 'train', 'test' or 'val', but got {}".format(mode) self.mode = mode.lower() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, DATA_URL, DATA_MD5, 'wmt16', download) self.lang = lang assert src_dict_size > 0, "dict_size should be set as positive number" assert trg_dict_size > 0, "dict_size should be set as positive number" self.src_dict_size = min( src_dict_size, (TOTAL_EN_WORDS if lang == "en" else TOTAL_DE_WORDS)) self.trg_dict_size = min( trg_dict_size, (TOTAL_DE_WORDS if lang == "en" else TOTAL_EN_WORDS)) # load source and target word dict self.src_dict = self._load_dict(lang, src_dict_size) self.trg_dict = self._load_dict("de" if lang == "en" else "en", trg_dict_size) # load data self.data = self._load_data()
def __init__(self, data_file=None, data_type='NGRAM', window_size=-1, mode='train', min_word_freq=50, download=True): assert data_type.upper() in ['NGRAM', 'SEQ'], \ "data type should be 'NGRAM', 'SEQ', but got {}".format(data_type) self.data_type = data_type.upper() assert mode.lower() in ['train', 'test'], \ "mode should be 'train', 'test', but got {}".format(mode) self.mode = mode.lower() self.window_size = window_size self.min_word_freq = min_word_freq self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically disabled" self.data_file = _check_exists_and_download(data_file, URL, MD5, 'imikolov', download) # Build a word dictionary from the corpus self.word_idx = self._build_work_dict(min_word_freq) # read dataset into memory self._load_anno()
def __init__(self, data_file=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}" .format(backend)) self.backend = backend self.flag = MODE_FLAG_MAP[mode.lower()] self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, VOC_URL, VOC_MD5, CACHE_DIR, download) self.transform = transform # read dataset into memory self._load_anno() self.dtype = paddle.get_default_dtype()
def __init__(self, data_file=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'test', 'train', 'test'], \ "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode) self.mode = mode.lower() if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}". format(backend)) self.backend = backend self._init_url_md5_flag() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, self.data_url, self.data_md5, 'cifar', download) self.transform = transform # read dataset into memory self._load_data() self.dtype = paddle.get_default_dtype()
def __init__(self, data_file=None, label_file=None, setid_file=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}" .format(backend)) self.backend = backend flag = MODE_FLAG_MAP[mode.lower()] if not data_file: assert download, "data_file is not set and downloading automatically is disabled" data_file = _check_exists_and_download( data_file, DATA_URL, DATA_MD5, 'flowers', download) if not label_file: assert download, "label_file is not set and downloading automatically is disabled" label_file = _check_exists_and_download( label_file, LABEL_URL, LABEL_MD5, 'flowers', download) if not setid_file: assert download, "setid_file is not set and downloading automatically is disabled" setid_file = _check_exists_and_download( setid_file, SETID_URL, SETID_MD5, 'flowers', download) self.transform = transform data_tar = tarfile.open(data_file) self.data_path = data_file.replace(".tgz", "/") if not os.path.exists(self.data_path): os.mkdir(self.data_path) data_tar.extractall(self.data_path) scio = try_import('scipy.io') self.labels = scio.loadmat(label_file)['labels'][0] self.indexes = scio.loadmat(setid_file)[flag][0]
def __init__(self, data_file=None, word_dict_file=None, verb_dict_file=None, target_dict_file=None, emb_file=None, download=True): self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, DATA_URL, DATA_MD5, 'conll05st', download) self.word_dict_file = word_dict_file if self.word_dict_file is None: assert download, "word_dict_file is not set and downloading automatically is disabled" self.word_dict_file = _check_exists_and_download( word_dict_file, WORDDICT_URL, WORDDICT_MD5, 'conll05st', download) self.verb_dict_file = verb_dict_file if self.verb_dict_file is None: assert download, "verb_dict_file is not set and downloading automatically is disabled" self.verb_dict_file = _check_exists_and_download( verb_dict_file, VERBDICT_URL, VERBDICT_MD5, 'conll05st', download) self.target_dict_file = target_dict_file if self.target_dict_file is None: assert download, "target_dict_file is not set and downloading automatically is disabled" self.target_dict_file = _check_exists_and_download( target_dict_file, TRGDICT_URL, TRGDICT_MD5, 'conll05st', download) self.emb_file = emb_file if self.emb_file is None: assert download, "emb_file is not set and downloading automatically is disabled" self.emb_file = _check_exists_and_download(emb_file, EMB_URL, EMB_MD5, 'conll05st', download) self.word_dict = self._load_dict(self.word_dict_file) self.predicate_dict = self._load_dict(self.verb_dict_file) self.label_dict = self._load_label_dict(self.target_dict_file) # read dataset into memory self._load_anno()
def __init__(self, image_path=None, label_path=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}". format(backend)) self.backend = backend self.mode = mode.lower() self.image_path = image_path if self.image_path is None: assert download, "image_path is not set and downloading automatically is disabled" image_url = self.TRAIN_IMAGE_URL if mode == 'train' else self.TEST_IMAGE_URL image_md5 = self.TRAIN_IMAGE_MD5 if mode == 'train' else self.TEST_IMAGE_MD5 self.image_path = _check_exists_and_download( image_path, image_url, image_md5, self.NAME, download) self.label_path = label_path if self.label_path is None: assert download, "label_path is not set and downloading automatically is disabled" label_url = self.TRAIN_LABEL_URL if self.mode == 'train' else self.TEST_LABEL_URL label_md5 = self.TRAIN_LABEL_MD5 if self.mode == 'train' else self.TEST_LABEL_MD5 self.label_path = _check_exists_and_download( label_path, label_url, label_md5, self.NAME, download) self.transform = transform # read dataset into memory self._parse_dataset() self.dtype = paddle.get_default_dtype()
def __init__(self, data_file=None, mode='train', download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train' or 'test', but got {}".format(mode) self.mode = mode.lower() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, URL, MD5, 'uci_housing', download) # read dataset into memory self._load_data()
def __init__(self, data_file=None, mode='train', cutoff=150, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train', 'test', but got {}".format(mode) self.mode = mode.lower() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download(data_file, URL, MD5, 'imdb', download) # Build a word dictionary from the corpus self.word_idx = self._build_work_dict(cutoff) # read dataset into memory self._load_anno()
def __init__(self, data_file=None, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) self.flag = MODE_FLAG_MAP[mode.lower()] self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, VOC_URL, VOC_MD5, CACHE_DIR, download) self.transform = transform # read dataset into memory self._load_anno()
def __init__(self, data_file=None, mode='train', dict_size=-1, download=True): assert mode.lower() in ['train', 'test', 'gen'], \ "mode should be 'train', 'test' or 'gen', but got {}".format(mode) self.mode = mode.lower() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, URL_TRAIN, MD5_TRAIN, 'wmt14', download) # read dataset into memory assert dict_size > 0, "dict_size should be set as positive number" self.dict_size = dict_size self._load_data()
def __init__(self, data_file=None, mode='train', transform=None, download=True): assert mode.lower() in ['train', 'test', 'train', 'test'], \ "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode) self.mode = mode.lower() self._init_url_md5_flag() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, self.data_url, self.data_md5, 'cifar', download) self.transform = transform # read dataset into memory self._load_data()
def __init__(self, data_file=None, mode='train', test_ratio=0.1, rand_seed=0, download=True): assert mode.lower() in ['train', 'test'], \ "mode should be 'train', 'test', but got {}".format(mode) self.mode = mode.lower() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, URL, MD5, 'sentiment', download) self.test_ratio = test_ratio self.rand_seed = rand_seed np.random.seed(rand_seed) self._load_meta_info() self._load_data()