def fetch_inventories(): """Fetch all inventories for Airflow documentatio packages and store in cache.""" os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True) to_download = [] for pkg_name in get_available_providers_packages(): to_download.append( ( S3_DOC_URL_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', ) ) to_download.append( ( S3_DOC_URL_VERSIONED.format(package_name='apache-airflow'), f'{CACHE_DIR}/apache-airflow/objects.inv', ) ) to_download.append( ( S3_DOC_URL_NON_VERSIONED.format(package_name='apache-airflow-providers'), f'{CACHE_DIR}/apache-airflow-providers/objects.inv', ) ) to_download = [ (url, path) for url, path in to_download if not (os.path.isfile(path) and _is_outdated(path)) ] print(f"To download {len(to_download)} inventorie(s)") if not to_download: return with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool: for url, path in to_download: pool.submit(_fetch_file, session=session, url=url, path=path)
def fetch_inventories(): """Fetch all inventories for Airflow documentation packages and store in cache.""" os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True) to_download: List[Tuple[str, str, str]] = [] for pkg_name in get_available_providers_packages(): to_download.append(( pkg_name, S3_DOC_URL_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', )) for pkg_name in ['apache-airflow', 'helm-chart']: to_download.append(( pkg_name, S3_DOC_URL_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', )) for pkg_name in ['apache-airflow-providers', 'docker-stack']: to_download.append(( pkg_name, S3_DOC_URL_NON_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', )) to_download.extend(( pkg_name, f"{doc_url}/objects.inv", f'{CACHE_DIR}/{pkg_name}/objects.inv', ) for pkg_name, doc_url in THIRD_PARTY_INDEXES.items()) to_download = [(pkg_name, url, path) for pkg_name, url, path in to_download if _is_outdated(path)] if not to_download: print("Nothing to do") return [] print(f"To download {len(to_download)} inventorie(s)") with requests.Session() as session, concurrent.futures.ThreadPoolExecutor( DEFAULT_POOLSIZE) as pool: download_results: Iterator[Tuple[str, bool]] = pool.map( _fetch_file, repeat(session, len(to_download)), (pkg_name for pkg_name, _, _ in to_download), (url for _, url, _ in to_download), (path for _, _, path in to_download), ) failed, success = partition(lambda d: d[1], download_results) failed, success = list(failed), list(success) print(f"Result: {len(success)} success, {len(failed)} failed") if failed: print("Failed packages:") for pkg_no, (pkg_name, _) in enumerate(failed, start=1): print(f"{pkg_no}. {pkg_name}") return [pkg_name for pkg_name, status in failed]
def fetch_inventories(): """Fetch all inventories for Airflow documentation packages and store in cache.""" os.makedirs(os.path.dirname(CACHE_DIR), exist_ok=True) to_download = [] for pkg_name in get_available_providers_packages(): to_download.append( ( S3_DOC_URL_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', ) ) to_download.append( ( S3_DOC_URL_VERSIONED.format(package_name='apache-airflow'), f'{CACHE_DIR}/apache-airflow/objects.inv', ) ) for pkg_name in ['apache-airflow-providers', 'docker-stack']: to_download.append( ( S3_DOC_URL_NON_VERSIONED.format(package_name=pkg_name), f'{CACHE_DIR}/{pkg_name}/objects.inv', ) ) to_download.extend( ( f"{doc_url}/objects.inv", f'{CACHE_DIR}/{pkg_name}/objects.inv', ) for pkg_name, doc_url in THIRD_PARTY_INDEXES.items() ) to_download = [(url, path) for url, path in to_download if _is_outdated(path)] if not to_download: print("Nothing to do") return print(f"To download {len(to_download)} inventorie(s)") with requests.Session() as session, concurrent.futures.ThreadPoolExecutor(DEFAULT_POOLSIZE) as pool: for url, path in to_download: pool.submit(_fetch_file, session=session, url=url, path=path)