Exemplo n.º 1
0
def _download_raw_dataset(metadata):
    if os.path.exists(metadata["filename"]):
        return
    print(f"Downloading raw dataset from {metadata['url']}...")
    util.download_url(metadata["url"], metadata["filename"])
    sha256 = util.compute_sha256(metadata["filename"])
    if sha256 != metadata["sha256"]:
        raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.")
Exemplo n.º 2
0
def _download_raw_dataset(metadata):
    if os.path.exists(metadata['filename']):
        return
    print('Downloading raw dataset...')
    util.download_url(metadata['url'], metadata['filename'])
    print('Computing SHA-256...')
    sha256 = util.compute_sha256(metadata['filename'])
    if sha256 != metadata['sha256']:
        raise ValueError('Downloaded data file SHA-256 does not match that listed in metadata document.')
 def load_or_generate_data(self):
     """Load or generate dataset data."""
     if not PROCESSED_DATA_FILENAME.exists():
         PROCESSED_DATA_DIRNAME.mkdir(parents=True, exist_ok=True)
         print('Downloading IAM lines...')
         util.download_url(PROCESSED_DATA_URL, PROCESSED_DATA_FILENAME)
     with h5py.File(PROCESSED_DATA_FILENAME, 'r') as f:
         self.x_train = f['x_train'][:]
         self.y_train_int = f['y_train'][:]
         self.x_test = f['x_test'][:]
         self.y_test_int = f['y_test'][:]
     self._subsample()
def _download_raw_dataset(metadata: Dict, dl_dirname: Path) -> Path:
    dl_dirname.mkdir(parents=True, exist_ok=True)
    filename = dl_dirname / metadata["filename"]
    if filename.exists():
        return
    print(f"Downloading raw dataset from {metadata['url']} to {filename}...")
    util.download_url(metadata["url"], filename)
    print("Computing SHA-256...")
    sha256 = util.compute_sha256(filename)
    if sha256 != metadata["sha256"]:
        raise ValueError("Downloaded data file SHA-256 does not match that listed in metadata document.")
    return filename
Exemplo n.º 5
0
 def _download_pages(self):
     PAGES_DIRNAME.mkdir(exist_ok=True, parents=True)
     ids, urls = zip(*[(id_, data["url"])
                       for id_, data in self.data_by_page_id.items()])
     filenames = [PAGES_DIRNAME / id_ for id_ in ids]
     util.download_url(urls, filenames)