def download_file_from_s3(bucket_name: str, key: str, local_path: str) -> None: """ Downloads file from S3 anonymously :param bucket_name: S3 Bucket name :param key: S3 File key name :param local_path: Local file path to download as """ verify_ssl = get_verify_ssl() if not os.path.isfile(local_path): client = boto3.client("s3", config=Config(signature_version=UNSIGNED), verify=verify_ssl) try: logger.info("Downloading S3 data file...") total = client.head_object(Bucket=bucket_name, Key=key)["ContentLength"] with ProgressPercentage(client, bucket_name, key, total) as Callback: client.download_file(bucket_name, key, local_path, Callback=Callback) except ClientError: raise KeyError( f"File {key} not available in {bucket_name} bucket.") else: logger.info(f"Reusing cached file {local_path}...")
def download_private_file_from_s3(bucket_name: str, key: str, local_path: str): """ Downloads file from S3 using credentials stored in ENV variables. :param bucket_name: S3 Bucket name :param key: S3 File keyname :param local_path: Local file path to download as """ verify_ssl = get_verify_ssl() if not os.path.isfile(local_path): client = boto3.client( "s3", aws_access_key_id=os.getenv("ARMORY_PRIVATE_S3_ID"), aws_secret_access_key=os.getenv("ARMORY_PRIVATE_S3_KEY"), verify=verify_ssl, ) try: logger.info("Downloading S3 data file...") total = client.head_object(Bucket=bucket_name, Key=key)["ContentLength"] with ProgressPercentage(client, bucket_name, key, total) as Callback: client.download_file(bucket_name, key, local_path, Callback=Callback) except ClientError: raise KeyError( f"File {key} not available in {bucket_name} bucket.") else: logger.info("Reusing cached S3 data file...")
def download_requests(url: str, dirpath: str, filename: str): verify_ssl = get_verify_ssl() filepath = os.path.join(dirpath, filename) chunk_size = 4096 r = requests.get(url, stream=True, verify=verify_ssl) with open(filepath, "wb") as f: progress_bar = tqdm(unit="B", total=int(r.headers["Content-Length"]), unit_scale=True) for chunk in r.iter_content(chunk_size=chunk_size): if chunk: # filter keep-alive chunks progress_bar.update(len(chunk)) f.write(chunk)
def download_and_extract_repo(external_repo_name: str, external_repo_dir: str = None) -> None: """ Downloads and extracts an external repository for use within ARMORY. The external repositories project root will be added to the sys path. Private repositories require an `ARMORY_GITHUB_TOKEN` environment variable. :param external_repo_name: String name of "organization/repo-name" or "organization/repo-name@branch" """ verify_ssl = get_verify_ssl() if external_repo_dir is None: external_repo_dir = paths.runtime_paths().external_repo_dir os.makedirs(external_repo_dir, exist_ok=True) headers = {} if "@" in external_repo_name: org_repo_name, branch = external_repo_name.split("@") else: org_repo_name = external_repo_name branch = "master" repo_name = org_repo_name.split("/")[-1] if "ARMORY_GITHUB_TOKEN" in os.environ and os.getenv( "ARMORY_GITHUB_TOKEN") != "": headers = { "Authorization": f'token {os.getenv("ARMORY_GITHUB_TOKEN")}' } response = requests.get( f"https://api.github.com/repos/{org_repo_name}/tarball/{branch}", headers=headers, stream=True, verify=verify_ssl, ) if response.status_code == 200: logger.info(f"Downloading external repo: {external_repo_name}") tar_filename = os.path.join(external_repo_dir, repo_name + ".tar.gz") with open(tar_filename, "wb") as f: f.write(response.raw.read()) tar = tarfile.open(tar_filename, "r:gz") dl_directory_name = tar.getnames()[0] tar.extractall(path=external_repo_dir) # Always overwrite existing repositories to keep them at HEAD final_dir_name = os.path.join(external_repo_dir, repo_name) if os.path.isdir(final_dir_name): shutil.rmtree(final_dir_name) os.rename( os.path.join(external_repo_dir, dl_directory_name), final_dir_name, ) add_path(final_dir_name, include_parent=True) else: raise ConnectionError( "Unable to download repository. If it's private make sure " "`ARMORY_GITHUB_TOKEN` environment variable is set\n" f"status_code is {response.status_code}\n" f"full response is {response.text}")