def __init__(self, app, name, machine_name_prefix, is_pool): self.app = app self.db: Database = app['db'] self.compute_client: aiogoogle.GoogleComputeClient = self.app[ 'compute_client'] self.zone_monitor: ZoneMonitor = self.app['zone_monitor'] self.name = name self.machine_name_prefix = f'{machine_name_prefix}{self.name}-' self.is_pool = is_pool self.name_instance: Dict[str, Instance] = {} self.live_free_cores_mcpu_by_zone: Dict[ str, int] = collections.defaultdict(int) self.instances_by_last_updated = sortedcontainers.SortedSet( key=lambda instance: instance.last_updated) self.n_instances_by_state = { 'pending': 0, 'active': 0, 'inactive': 0, 'deleted': 0 } # pending and active self.live_free_cores_mcpu = 0 self.live_total_cores_mcpu = 0 self.boot_disk_size_gb = None self.max_instances = None self.max_live_instances = None self.task_manager = aiotools.BackgroundTaskManager()
def __init__(self, k8s_client: kube.client.CoreV1Api): self.entries: Dict[str, CacheEntry[List[AddressAndPort]]] = dict() self.locks: DefaultDict[str, asyncio.Lock] = defaultdict(asyncio.Lock) self.keys = sortedcontainers.SortedSet( key=lambda key: self.entries[key].expire_time) self.k8s_client: kube.client.CoreV1Api = k8s_client self.task_manager = aiotools.BackgroundTaskManager()
def __init__(self, handler, event=None, bump_secs=60.0, min_delay_secs=0.1): self.handler = handler if event is None: event = asyncio.Event() self.event = event self.bump_secs = bump_secs self.min_delay_secs = min_delay_secs self.task_manager = aiotools.BackgroundTaskManager()
def __init__(self, app, pool): self.app = app self.scheduler_state_changed = pool.scheduler_state_changed self.db: Database = app['db'] self.pool = pool self.async_worker_pool: AsyncWorkerPool = self.app['async_worker_pool'] self.exceeded_shares_counter = ExceededSharesCounter() self.task_manager = aiotools.BackgroundTaskManager()
def __init__(self, app): self.app = app self.compute_client: aiogoogle.ComputeClient = app['compute_client'] self.zone_success_rate = ZoneSuccessRate() self.region_info = None self.task_manager = aiotools.BackgroundTaskManager()
def __init__(self, app, machine_name_prefix): self.app = app self.db: Database = app['db'] self.zone_monitor: ZoneMonitor = app['zone_monitor'] self.compute_client: aiogoogle.ComputeClient = app['compute_client'] self.logging_client: aiogoogle.LoggingClient = app['logging_client'] self.inst_coll_manager: InstanceCollectionManager = app['inst_coll_manager'] self.machine_name_prefix = machine_name_prefix self.task_manager = aiotools.BackgroundTaskManager()
async def on_startup(app): app['gs_reader'] = ReadGoogleStorage( service_account_key_file='/benchmark-gsa-key/key.json') app['gh_client_session'] = aiohttp.ClientSession() app['github_client'] = gidgethub.aiohttp.GitHubAPI( app['gh_client_session'], 'hail-is/hail', oauth_token=oauth_token) app['batch_client'] = bc.BatchClient(billing_project='benchmark') app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('github_polling_loop', github_polling_loop, app))
def __init__(self, app): self.app = app self.cancel_ready_state_changed: asyncio.Event = app['cancel_ready_state_changed'] self.cancel_creating_state_changed: asyncio.Event = app['cancel_creating_state_changed'] self.cancel_running_state_changed: asyncio.Event = app['cancel_running_state_changed'] self.db: Database = app['db'] self.async_worker_pool: AsyncWorkerPool = self.app['async_worker_pool'] self.compute_client: aiogoogle.GoogleComputeClient = self.app['compute_client'] self.inst_coll_manager: InstanceCollectionManager = app['inst_coll_manager'] self.task_manager = aiotools.BackgroundTaskManager()
async def on_startup(app): app['gh_client_session'] = aiohttp.ClientSession( timeout=aiohttp.ClientTimeout(total=5)) app['github_client'] = gh_aiohttp.GitHubAPI(app['gh_client_session'], 'ci', oauth_token=oauth_token) app['batch_client'] = BatchClient('ci') app['dbpool'] = await create_database_pool() app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(update_loop(app))
async def on_startup(app): app['client_session'] = httpx.client_session() app['github_client'] = gh_aiohttp.GitHubAPI(app['client_session'], 'ci', oauth_token=oauth_token) app['batch_client'] = await BatchClient.create('ci') app['db'] = Database() await app['db'].async_init() app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(update_loop(app))
async def on_startup(app): credentials = aiogoogle.GoogleCredentials.from_file( '/benchmark-gsa-key/key.json') app['fs'] = aiogoogle.GoogleStorageAsyncFS(credentials=credentials) app['client_session'] = httpx.client_session() app['github_client'] = gidgethub.aiohttp.GitHubAPI(app['client_session'], 'hail-is/hail', oauth_token=oauth_token) app['batch_client'] = await bc.BatchClient.create( billing_project='benchmark') app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('github_polling_loop', github_polling_loop, app))
async def on_startup(app): token_file = os.environ.get('GITHUB_TOKEN_PATH', '/secrets/scorecard-github-access-token.txt') with open(token_file, 'r') as f: token = f.read().strip() app['gh_session'] = aiohttp.ClientSession( raise_for_status=True, timeout=aiohttp.ClientTimeout(total=5)) gh_client = gidgethub.aiohttp.GitHubAPI(app['gh_session'], 'scorecard', oauth_token=token) app['gh_client'] = gh_client asana_client = AsanaClient() app['asana_client'] = asana_client await update_data(gh_client, asana_client) app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future(poll(gh_client, asana_client))
async def on_startup(app): db = Database() await db.async_init() app['db'] = db app['client_session'] = httpx.client_session() aiogoogle_credentials = aiogoogle.GoogleCredentials.from_file( '/billing-monitoring-gsa-key/key.json') bigquery_client = aiogoogle.GoogleBigQueryClient( 'broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client compute_client = aiogoogle.GoogleComputeClient( PROJECT, credentials=aiogoogle_credentials) app['compute_client'] = compute_client query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event region_info = { name: await compute_client.get(f'/regions/{name}') for name in BATCH_GCP_REGIONS } zones = [url_basename(z) for r in region_info.values() for z in r['zones']] app['zones'] = zones app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('polling_loop', polling_loop, app)) app['task_manager'].ensure_future( retry_long_running('query_billing_loop', run_if_changed_idempotent, query_billing_event, query_billing_body, app)) app['task_manager'].ensure_future(periodically_call( 60, monitor_disks, app)) app['task_manager'].ensure_future( periodically_call(60, monitor_instances, app))
async def on_startup(app): db = Database() await db.async_init() app['db'] = db aiogoogle_credentials = aiogoogle.Credentials.from_file( '/billing-monitoring-gsa-key/key.json') bigquery_client = BigQueryClient('broad-ctsa', credentials=aiogoogle_credentials) app['bigquery_client'] = bigquery_client query_billing_event = asyncio.Event() app['query_billing_event'] = query_billing_event app['task_manager'] = aiotools.BackgroundTaskManager() app['task_manager'].ensure_future( retry_long_running('polling_loop', polling_loop, app)) app['task_manager'].ensure_future( retry_long_running('query_billing_loop', run_if_changed_idempotent, query_billing_event, query_billing_body, app))
def __init__(self, parallelism): self._semaphore = asyncio.Semaphore(parallelism) self.task_manager = aiotools.BackgroundTaskManager()
async def on_startup(app): app['task_manager'] = aiotools.BackgroundTaskManager() pool = concurrent.futures.ThreadPoolExecutor() app['blocking_pool'] = pool kube.config.load_incluster_config() k8s_client = kube.client.CoreV1Api() k8s_cache = K8sCache(k8s_client, refresh_time=5) app['k8s_cache'] = k8s_cache db = Database() await db.async_init(maxsize=50) app['db'] = db row = await db.select_and_fetchone(''' SELECT instance_id, internal_token FROM globals; ''') instance_id = row['instance_id'] log.info(f'instance_id {instance_id}') app['instance_id'] = instance_id app['internal_token'] = row['internal_token'] app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} resources = db.select_and_fetchall('SELECT resource FROM resources;') app['resources'] = [record['resource'] async for record in resources] aiogoogle_credentials = aiogoogle.Credentials.from_file( '/gsa-key/key.json') compute_client = aiogoogle.ComputeClient(PROJECT, credentials=aiogoogle_credentials) app['compute_client'] = compute_client logging_client = aiogoogle.LoggingClient( credentials=aiogoogle_credentials, # The project-wide logging quota is 60 request/m. The event # loop sleeps 15s per iteration, so the max rate is 4 # iterations/m. Note, the event loop could make multiple # logging requests per iteration, so these numbers are not # quite comparable. I didn't want to consume the entire quota # since there will be other users of the logging API (us at # the web console, test deployments, etc.) rate_limit=RateLimit(10, 60), ) app['logging_client'] = logging_client scheduler_state_changed = Notice() app['scheduler_state_changed'] = scheduler_state_changed cancel_ready_state_changed = asyncio.Event() app['cancel_ready_state_changed'] = cancel_ready_state_changed cancel_creating_state_changed = asyncio.Event() app['cancel_creating_state_changed'] = cancel_creating_state_changed cancel_running_state_changed = asyncio.Event() app['cancel_running_state_changed'] = cancel_running_state_changed async_worker_pool = AsyncWorkerPool(100, queue_size=100) app['async_worker_pool'] = async_worker_pool credentials = google.oauth2.service_account.Credentials.from_service_account_file( '/gsa-key/key.json') log_store = LogStore(BATCH_BUCKET_NAME, instance_id, pool, credentials=credentials) app['log_store'] = log_store zone_monitor = ZoneMonitor(app) app['zone_monitor'] = zone_monitor await zone_monitor.async_init() inst_coll_configs = InstanceCollectionConfigs(app) await inst_coll_configs.async_init() inst_coll_manager = InstanceCollectionManager(app, MACHINE_NAME_PREFIX) app['inst_coll_manager'] = inst_coll_manager await inst_coll_manager.async_init(inst_coll_configs) canceller = Canceller(app) app['canceller'] = canceller await canceller.async_init() gce_event_monitor = GCEEventMonitor(app, MACHINE_NAME_PREFIX) app['gce_event_monitor'] = gce_event_monitor await gce_event_monitor.async_init() app['check_incremental_error'] = None app['check_resource_aggregation_error'] = None if HAIL_SHOULD_CHECK_INVARIANTS: app['task_manager'].ensure_future( periodically_call(10, check_incremental, app, db)) app['task_manager'].ensure_future( periodically_call(10, check_resource_aggregation, app, db)) app['task_manager'].ensure_future( periodically_call(10, monitor_billing_limits, app)) app['task_manager'].ensure_future( periodically_call(10, cancel_fast_failing_batches, app)) app['task_manager'].ensure_future( periodically_call(60, scheduling_cancelling_bump, app))
async def on_startup(app): task_manager = aiotools.BackgroundTaskManager() app['task_manager'] = task_manager app['client_session'] = httpx.client_session() kubernetes_asyncio.config.load_incluster_config() app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() app['k8s_cache'] = K8sCache(app['k8s_client']) db = Database() await db.async_init(maxsize=50) app['db'] = db row = await db.select_and_fetchone(''' SELECT instance_id, internal_token, frozen FROM globals; ''') instance_id = row['instance_id'] log.info(f'instance_id {instance_id}') app['instance_id'] = instance_id app['internal_token'] = row['internal_token'] app['batch_headers'] = {'Authorization': f'Bearer {row["internal_token"]}'} app['frozen'] = row['frozen'] scheduler_state_changed = Notice() app['scheduler_state_changed'] = scheduler_state_changed cancel_ready_state_changed = asyncio.Event() app['cancel_ready_state_changed'] = cancel_ready_state_changed cancel_creating_state_changed = asyncio.Event() app['cancel_creating_state_changed'] = cancel_creating_state_changed cancel_running_state_changed = asyncio.Event() app['cancel_running_state_changed'] = cancel_running_state_changed async_worker_pool = AsyncWorkerPool(100, queue_size=100) app['async_worker_pool'] = async_worker_pool credentials_file = '/gsa-key/key.json' fs = get_cloud_async_fs(credentials_file=credentials_file) app['file_store'] = FileStore(fs, BATCH_STORAGE_URI, instance_id) inst_coll_configs = await InstanceCollectionConfigs.create(db) app['driver'] = await get_cloud_driver(app, db, MACHINE_NAME_PREFIX, DEFAULT_NAMESPACE, inst_coll_configs, credentials_file, task_manager) canceller = await Canceller.create(app) app['canceller'] = canceller app['check_incremental_error'] = None app['check_resource_aggregation_error'] = None if HAIL_SHOULD_CHECK_INVARIANTS: task_manager.ensure_future( periodically_call(10, check_incremental, app, db)) task_manager.ensure_future( periodically_call(10, check_resource_aggregation, app, db)) task_manager.ensure_future( periodically_call(10, monitor_billing_limits, app)) task_manager.ensure_future( periodically_call(10, cancel_fast_failing_batches, app)) task_manager.ensure_future( periodically_call(60, scheduling_cancelling_bump, app)) task_manager.ensure_future(periodically_call(15, monitor_system, app))