def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) data = dict() for k, v in local_files.items(): data[k] = read_mnist_file(v) with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)
def _prepare(self): self.root = edu.get_root(self.NAME) self._data_path = Path(self.root).joinpath("data.p") if not edu.is_prepared(self.root): # prep self.logger.info("Preparing dataset {} in {}".format( self.NAME, self.root)) root = Path(self.root) urls = dict((v, urllib.parse.urljoin(self.URL, v)) for k, v in self.FILES.items()) local_files = edu.download_urls(urls, target_dir=root) edu.unpack(local_files["cifar-10-python.tar.gz"]) base = os.path.join(self.root, "cifar-10-batches-py") labels = list() filenames = list() datas = list() for batch_file in ["data_batch_{}".format(i) for i in range(1, 6)]: with open(os.path.join(base, batch_file), "rb") as f: batch_data = pickle.load(f, encoding="bytes") labels += batch_data["labels".encode()] filenames += [ fname.decode() for fname in batch_data["filenames".encode()] ] datas.append(batch_data["data".encode()]) with open(os.path.join(base, "test_batch"), "rb") as f: test_data = pickle.load(f, encoding="bytes") test_labels = test_data["labels".encode()] test_filenames = [ fname.decode() for fname in test_data["filenames".encode()] ] test_datas = test_data["data".encode()] with open(os.path.join(base, "batches.meta"), "rb") as f: _meta = pickle.load(f, encoding="bytes") meta = { "label_names": [name.decode() for name in _meta["label_names".encode()]], "num_vis": _meta["num_vis".encode()], "num_cases_per_batch": _meta["num_cases_per_batch".encode()], } # convert to (32,32,3) RGB uint8 images = np.concatenate(datas, axis=0) images = np.reshape(images, [-1, 3, 32, 32]) images = np.transpose(images, [0, 2, 3, 1]) test_images = test_datas test_images = np.reshape(test_images, [-1, 3, 32, 32]) test_images = np.transpose(test_images, [0, 2, 3, 1]) filenames = np.array(filenames) test_filenames = np.array(test_filenames) labels = np.array(labels) test_labels = np.array(test_labels) data = { "train": dict(images=images, filenames=filenames, labels=labels), "test": dict(images=test_images, filenames=test_filenames, labels=test_labels), "meta": meta, } with open(self._data_path, "wb") as f: pickle.dump(data, f) edu.mark_prepared(self.root)