def test_download_bad_request(): source_uri = "https://mock.uri" dest_path = "file/path/does/not/matter" responses.add(responses.GET, source_uri, status=403) with pytest.raises(DownloadError): download_file(source_uri, dest_path, False)
def _download_http(source_uri, dest_path, version): """ Download dataset from Public HTTP URL. Args: source_uri (str): source url where the file should be downloaded dest_path (str): destination path of the file Raises: DownloadError if the download file failed ChecksumError if the download file checksum does not match """ try: logger.info("Downloading the dataset.") download_file(source_uri=source_uri, dest_path=dest_path) except DownloadError as e: logger.info( f"The request download from {source_uri} -> {dest_path} can't " f"be completed.") raise e expected_checksum = GroceriesReal.GROCERIES_REAL_DATASET_TABLES[ version].checksum try: validate_checksum(dest_path, expected_checksum) except ChecksumError as e: logger.info("Checksum mismatch. Delete the downloaded files.") os.remove(dest_path) raise e
def download(data_root, version): """Downloads dataset zip file and unzips it. Args: data_root (str): Path where to download the dataset. version (str): version of GroceriesReal dataset, e.g. "v1" Raises: ValueError if the dataset version is not supported ChecksumError if the download file checksum does not match DownloadError if the download file failed Note: Synthetic dataset is downloaded and unzipped to data_root/synthetic. """ if version not in SynDetection2D.SYNTHETIC_DATASET_TABLES.keys(): raise ValueError( f"A valid dataset version is required. Available versions are:" f"{SynDetection2D.SYNTHETIC_DATASET_TABLES.keys()}" ) source_uri = SynDetection2D.SYNTHETIC_DATASET_TABLES[version].source_uri expected_checksum = SynDetection2D.SYNTHETIC_DATASET_TABLES[ version ].checksum dataset_file = SynDetection2D.SYNTHETIC_DATASET_TABLES[version].filename extract_folder = os.path.join(data_root, const.SYNTHETIC_SUBFOLDER) dataset_path = os.path.join(extract_folder, dataset_file) if os.path.exists(dataset_path): logger.info("The dataset file exists. Skip download.") try: validate_checksum(dataset_path, expected_checksum) except ChecksumError: logger.info( "The checksum of the previous dataset mismatches. " "Delete the previously downloaded dataset." ) os.remove(dataset_path) if not os.path.exists(dataset_path): logger.info(f"Downloading dataset to {extract_folder}.") download_file(source_uri, dataset_path) try: validate_checksum(dataset_path, expected_checksum) except ChecksumError as e: logger.info("Checksum mismatch. Delete the downloaded files.") os.remove(dataset_path) raise e SynDetection2D.unzip_file( filepath=dataset_path, destination=extract_folder )
def test_download_file_from_url(): source_uri = "https://mock.uri" body = b"some test string here" responses.add(responses.GET, source_uri, body=body, content_type="text/plain") with tempfile.TemporaryDirectory() as tmp_dir: dest_path = os.path.join(tmp_dir, "test.txt") download_file(source_uri, dest_path, False) assert os.path.exists(dest_path) assert open(dest_path, "rb").read() == body
def load_from_http(estimator, url): """Load estimator from checkpoint files on GCS. Args: estimator (datasetinsights.estimators.Estimator): datasetinsights estimator object. url: URL of the checkpoint file """ with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, "estimator_checkpoint") logger.debug(f"Downloading estimator from {url} to {path}") download_file(source_uri=url, dest_path=path) logger.debug(f"Loading estimator from {path}") estimator.load(path)
def download(self, source_uri, output, checksum_file=None, **kwargs): """ This method is used to download the dataset from HTTP or HTTPS url. Args: source_uri (str): This is the downloader-uri that indicates where the dataset should be downloaded from. output (str): This is the path to the directory where the download will store the dataset. checksum_file (str): This is path of the txt file that contains checksum of the dataset to be downloaded. It can be HTTP or HTTPS url or local path. Raises: ChecksumError: This will raise this error if checksum doesn't matches """ dataset_path = download_file(source_uri, output) if checksum_file: logger.debug("Reading checksum from checksum file.") checksum = get_checksum_from_file(checksum_file) try: logger.debug("Validating checksum!!") validate_checksum(dataset_path, int(checksum)) except ChecksumError as e: logger.info("Checksum mismatch. Deleting the downloaded file.") os.remove(dataset_path) raise e