def admin_context(): ctx = context.get_admin_context() context.set_ctx(ctx) try: yield finally: context.set_ctx(None)
def terminate_unneeded_clusters(self, ctx): LOG.debug('Terminating unneeded clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue cluster_updated_at = timeutils.normalize_time( timeutils.parse_isotime(cluster.updated_at)) current_time = timeutils.utcnow() spacing = timeutils.delta_seconds(cluster_updated_at, current_time) if spacing < CONF.min_transient_cluster_active_time: continue if CONF.use_identity_api_v3: trusts.use_os_admin_auth_token(cluster) api.terminate_cluster(cluster.id) LOG.debug('Terminated cluster %s with id %s' % (cluster.name, cluster.id)) else: if cluster.status != 'AwaitingTermination': conductor.cluster_update( ctx, cluster, {'status': 'AwaitingTermination'}) context.set_ctx(None)
def check_for_zombie_proxy_users(self, ctx): ctx = context.get_admin_context() context.set_ctx(ctx) for user in p.proxy_domain_users_list(): if user.name.startswith('job_'): je_id = user.name[4:] je = conductor.job_execution_get(ctx, je_id) if je is None or (je.info['status'] in edp.JOB_STATUSES_TERMINATED): LOG.debug('Found zombie proxy user {username}'.format( username=user.name)) p.proxy_user_delete(user_id=user.id) context.set_ctx(None)
def terminate_unneeded_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue cluster_updated_at = timeutils.normalize_time( timeutils.parse_isotime(cluster.updated_at)) current_time = timeutils.utcnow() spacing = timeutils.delta_seconds(cluster_updated_at, current_time) if spacing < CONF.min_transient_cluster_active_time: continue if CONF.use_identity_api_v3: trusts.use_os_admin_auth_token(cluster) LOG.info( _LI('Terminating transient cluster %(cluster)s ' 'with id %(id)s'), { 'cluster': cluster.name, 'id': cluster.id }) try: ops.terminate_cluster(cluster.id) except Exception as e: LOG.info( _LI('Failed to terminate transient cluster ' '%(cluster)s with id %(id)s: %(error)s.'), { 'cluster': cluster.name, 'id': cluster.id, 'error': six.text_type(e) }) else: if cluster.status != 'AwaitingTermination': conductor.cluster_update( ctx, cluster, {'status': 'AwaitingTermination'}) context.set_ctx(None)
def terminate_unneeded_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue cluster_updated_at = timeutils.normalize_time( timeutils.parse_isotime(cluster.updated_at)) current_time = timeutils.utcnow() spacing = timeutils.delta_seconds(cluster_updated_at, current_time) if spacing < CONF.min_transient_cluster_active_time: continue if CONF.use_identity_api_v3: trusts.use_os_admin_auth_token(cluster) LOG.info(_LI('Terminating transient cluster %(cluster)s ' 'with id %(id)s'), {'cluster': cluster.name, 'id': cluster.id}) try: ops.terminate_cluster(cluster.id) except Exception as e: LOG.info(_LI('Failed to terminate transient cluster ' '%(cluster)s with id %(id)s: %(error)s.'), {'cluster': cluster.name, 'id': cluster.id, 'error': six.text_type(e)}) else: if cluster.status != 'AwaitingTermination': conductor.cluster_update( ctx, cluster, {'status': 'AwaitingTermination'}) context.set_ctx(None)
def terminate_unneeded_transient_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue spacing = get_time_since_last_update(cluster) if spacing < CONF.min_transient_cluster_active_time: continue terminate_cluster(ctx, cluster, description='transient') context.set_ctx(None)
def terminate_incomplete_clusters(self, ctx): if CONF.cleanup_time_for_incomplete_clusters <= 0: return LOG.debug('Terminating old clusters in non-final state') ctx = context.get_admin_context() context.set_ctx(ctx) # NOTE(alazarev) Retrieving all clusters once in hour for now. # Criteria support need to be implemented in sahara db API to # have SQL filtering. for cluster in conductor.cluster_get_all(ctx): if cluster.status in ['Active', 'Error', 'Deleting']: continue spacing = get_time_since_last_update(cluster) if spacing < CONF.cleanup_time_for_incomplete_clusters * 3600: continue terminate_cluster(ctx, cluster, description='incomplete') context.set_ctx(None)
def terminate_incomplete_clusters(self, ctx): if CONF.cleanup_time_for_incomplete_clusters <= 0: return LOG.debug('Terminating old clusters in non-final state') ctx = context.get_admin_context() context.set_ctx(ctx) # NOTE(alazarev) Retrieving all clusters once in hour for now. # Criteria support need to be implemented in sahara db API to # have SQL filtering. for cluster in conductor.cluster_get_all(ctx): if cluster.status in ['Active', 'Error', 'Deleting']: continue spacing = get_time_since_last_update(cluster) if spacing < CONF.cleanup_time_for_incomplete_clusters * 3600: continue terminate_cluster(ctx, cluster, description='incomplete') # Add event log info cleanup context.ctx().current_instance_info = context.InstanceInfo() context.set_ctx(None)
def terminate_unneeded_transient_clusters(self, ctx): LOG.debug('Terminating unneeded transient clusters') ctx = context.get_admin_context() context.set_ctx(ctx) for cluster in conductor.cluster_get_all(ctx, status='Active'): if not cluster.is_transient: continue jc = conductor.job_execution_count(ctx, end_time=None, cluster_id=cluster.id) if jc > 0: continue spacing = get_time_since_last_update(cluster) if spacing < CONF.min_transient_cluster_active_time: continue terminate_cluster(ctx, cluster, description='transient') # Add event log info cleanup context.ctx().current_instance_info = context.InstanceInfo() context.set_ctx(None)
def update_job_statuses(self, ctx): LOG.debug('Updating job statuses') ctx = context.get_admin_context() context.set_ctx(ctx) job_manager.update_job_statuses() context.set_ctx(None)
def handler(self, ctx): ctx = context.get_admin_context() context.set_ctx(ctx) func(self, ctx) context.set_ctx(None)
def get_cluster(id, all_tenants=None): ctx = context.ctx() if all_tenants and 'admin' in ctx.roles: ctx = context.get_admin_context() return conductor.cluster_get(ctx, id)