def test_job_count_quota(self): admin = self.user_factory.admin() user = self.user_factory.new_user() all_job_uuids = [] try: # User with no quota can't submit jobs with admin: resp = util.set_limit(self.cook_url, 'quota', user.name, count=0) self.assertEqual(resp.status_code, 201, resp.text) with user: _, resp = util.submit_job(self.cook_url) self.assertEqual(resp.status_code, 422, msg=resp.text) # Reset user's quota back to default, then user can submit jobs again with admin: resp = util.reset_limit(self.cook_url, 'quota', user.name) self.assertEqual(resp.status_code, 204, resp.text) with user: job_uuid, resp = util.submit_job(self.cook_url) self.assertEqual(resp.status_code, 201, msg=resp.text) all_job_uuids.append(job_uuid) # Can't set negative quota with admin: resp = util.set_limit(self.cook_url, 'quota', user.name, count=-1) self.assertEqual(resp.status_code, 400, resp.text) finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids) util.reset_limit(self.cook_url, 'quota', user.name)
def test_job_cpu_quota(self): admin = self.user_factory.admin() user = self.user_factory.new_user() all_job_uuids = [] try: # User with no quota can't submit jobs with admin: resp = util.set_limit(self.cook_url, 'quota', user.name, cpus=0) self.assertEqual(resp.status_code, 201, resp.text) with user: _, resp = util.submit_job(self.cook_url) self.assertEqual(resp.status_code, 422, msg=resp.text) # User with tiny quota can't submit bigger jobs, but can submit tiny jobs with admin: resp = util.set_limit(self.cook_url, 'quota', user.name, cpus=0.25) self.assertEqual(resp.status_code, 201, resp.text) with user: _, resp = util.submit_job(self.cook_url, cpus=0.5) self.assertEqual(resp.status_code, 422, msg=resp.text) job_uuid, resp = util.submit_job(self.cook_url, cpus=0.25) self.assertEqual(resp.status_code, 201, msg=resp.text) all_job_uuids.append(job_uuid) # Reset user's quota back to default, then user can submit jobs again with admin: resp = util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name()) self.assertEqual(resp.status_code, 204, resp.text) with user: job_uuid, resp = util.submit_job(self.cook_url) self.assertEqual(resp.status_code, 201, msg=resp.text) all_job_uuids.append(job_uuid) # Can't set negative quota with admin: resp = util.set_limit(self.cook_url, 'quota', user.name, cpus=-4) self.assertEqual(resp.status_code, 400, resp.text) finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False) util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name())
def test_cannot_impersonate_admin_endpoints(self): user1 = self.user_factory.new_user() # admin can do admin things with self.admin: # read queue endpoint resp = util.query_queue(self.cook_url) self.assertEqual(resp.status_code, 200, resp.text) # set user quota resp = util.set_limit(self.cook_url, 'quota', user1.name, cpus=20) self.assertEqual(resp.status_code, 201, resp.text) # reset user quota back to default resp = util.reset_limit(self.cook_url, 'quota', user1.name, reason=self.current_name()) self.assertEqual(resp.status_code, 204, resp.text) # set user share resp = util.set_limit(self.cook_url, 'share', user1.name, cpus=10) self.assertEqual(resp.status_code, 201, resp.text) # reset user share back to default resp = util.reset_limit(self.cook_url, 'share', user1.name, reason=self.current_name()) self.assertEqual(resp.status_code, 204, resp.text) # impersonator cannot indirectly do admin things with self.poser.impersonating(self.admin): # read queue endpoint resp = util.query_queue(self.cook_url) self.assertEqual(resp.status_code, 403, resp.text) # set user quota resp = util.set_limit(self.cook_url, 'quota', user1.name, cpus=20) self.assertEqual(resp.status_code, 403, resp.text) # reset user quota back to default resp = util.reset_limit(self.cook_url, 'quota', user1.name, reason=self.current_name()) self.assertEqual(resp.status_code, 403, resp.text) # set user share resp = util.set_limit(self.cook_url, 'share', user1.name, cpus=10) self.assertEqual(resp.status_code, 403, resp.text) # reset user share back to default resp = util.reset_limit(self.cook_url, 'share', user1.name, reason=self.current_name()) self.assertEqual(resp.status_code, 403, resp.text)
def test_pool_scheduling(self): admin = self.user_factory.admin() user = self.user_factory.new_user() pools, _ = util.active_pools(self.cook_url) all_job_uuids = [] try: default_pool = util.default_pool(self.cook_url) self.assertLess(1, len(pools)) self.assertIsNotNone(default_pool) cpus = 0.1 with admin: self.logger.info( f'Running tasks: {json.dumps(util.running_tasks(self.cook_url), indent=2)}' ) for pool in pools: # Lower the user's cpu quota on this pool pool_name = pool['name'] quota_multiplier = 1 if pool_name == default_pool else 2 util.set_limit(self.cook_url, 'quota', user.name, cpus=cpus * quota_multiplier, pool=pool_name) with user: util.kill_running_and_waiting_jobs(self.cook_url, user.name) for pool in pools: pool_name = pool['name'] # Submit a job that fills the user's quota on this pool quota = util.get_limit(self.cook_url, 'quota', user.name, pool_name).json() quota_cpus = quota['cpus'] filling_job_uuid, _ = util.submit_job(self.cook_url, cpus=quota_cpus, command='sleep 600', pool=pool_name) all_job_uuids.append(filling_job_uuid) instance = util.wait_for_running_instance( self.cook_url, filling_job_uuid) slave_pool = util.node_pool(instance['hostname']) self.assertEqual(pool_name, slave_pool) # Submit a job that should not get scheduled job_uuid, _ = util.submit_job(self.cook_url, cpus=cpus, command='ls', pool=pool_name) all_job_uuids.append(job_uuid) job = util.load_job(self.cook_url, job_uuid) self.assertEqual('waiting', job['status']) # Assert that the unscheduled reason and data are correct @retry(stop_max_delay=60000, wait_fixed=5000) def check_unscheduled_reason(): jobs, _ = util.unscheduled_jobs( self.cook_url, job_uuid) self.logger.info(f'Unscheduled jobs: {jobs}') self.assertEqual(job_uuid, jobs[0]['uuid']) job_reasons = jobs[0]['reasons'] # Check the spot-in-queue reason reason = next(r for r in job_reasons if r['reason'] == 'You have 1 other jobs ahead in the ' 'queue.') self.assertEqual({'jobs': [filling_job_uuid]}, reason['data']) # Check the exceeding-quota reason reason = next( r for r in job_reasons if r['reason'] == reasons.JOB_WOULD_EXCEED_QUOTA) self.assertEqual( { 'cpus': { 'limit': quota_cpus, 'usage': quota_cpus + cpus } }, reason['data']) check_unscheduled_reason() finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False) for pool in pools: util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name(), pool=pool['name'])
def trigger_preemption(self, pool): """ Triggers preemption on the provided pool (which can be None) by doing the following: 1. Choose a user, X 2. Lower X's cpu share to 0.1 and cpu quota to 1.0 3. Submit a job, J1, from X with 1.0 cpu and priority 99 (fills the cpu quota) 4. Wait for J1 to start running 5. Submit a job, J2, from X with 0.1 cpu and priority 100 6. Wait until J1 is preempted (to make room for J2) """ admin = self.user_factory.admin() user = self.user_factory.new_user() all_job_uuids = [] try: small_cpus = 0.1 large_cpus = small_cpus * 10 with admin: # Lower the user's cpu share and quota util.set_limit(self.cook_url, 'share', user.name, cpus=small_cpus, pool=pool) util.set_limit(self.cook_url, 'quota', user.name, cpus=large_cpus, pool=pool) with user: # Submit a large job that fills up the user's quota base_priority = 99 command = 'sleep 600' uuid_large, _ = util.submit_job(self.cook_url, priority=base_priority, cpus=large_cpus, command=command, pool=pool) all_job_uuids.append(uuid_large) util.wait_for_running_instance(self.cook_url, uuid_large) # Submit a higher-priority job that should trigger preemption uuid_high_priority, _ = util.submit_job( self.cook_url, priority=base_priority + 1, cpus=small_cpus, command=command, name='higher_priority_job', pool=pool) all_job_uuids.append(uuid_high_priority) # Assert that the lower-priority job was preempted def low_priority_job(): job = util.load_job(self.cook_url, uuid_large) one_hour_in_millis = 60 * 60 * 1000 start = util.current_milli_time() - one_hour_in_millis end = util.current_milli_time() running = util.jobs(self.cook_url, user=user.name, state='running', start=start, end=end).json() waiting = util.jobs(self.cook_url, user=user.name, state='waiting', start=start, end=end).json() self.logger.info( f'Currently running jobs: {json.dumps(running, indent=2)}' ) self.logger.info( f'Currently waiting jobs: {json.dumps(waiting, indent=2)}' ) return job def job_was_preempted(job): for instance in job['instances']: self.logger.debug( f'Checking if instance was preempted: {instance}') if instance.get( 'reason_string') == 'Preempted by rebalancer': return True self.logger.info(f'Job has not been preempted: {job}') return False max_wait_ms = util.settings( self.cook_url )['rebalancer']['interval-seconds'] * 1000 * 1.5 self.logger.info( f'Waiting up to {max_wait_ms} milliseconds for preemption to happen' ) util.wait_until(low_priority_job, job_was_preempted, max_wait_ms=max_wait_ms, wait_interval_ms=5000) finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False) util.reset_limit(self.cook_url, 'share', user.name, reason=self.current_name(), pool=pool) util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name(), pool=pool)
def test_preemption(self): admin = self.user_factory.admin() user = self.user_factory.new_user() all_job_uuids = [] try: small_cpus = 0.1 large_cpus = small_cpus * 10 with admin: # Lower the user's cpu share and quota util.set_limit(self.cook_url, 'share', user.name, cpus=small_cpus) util.set_limit(self.cook_url, 'quota', user.name, cpus=large_cpus) with user: # Submit a large job that fills up the user's quota base_priority = 99 command = 'sleep 600' uuid_large, _ = util.submit_job(self.cook_url, priority=base_priority, cpus=large_cpus, command=command) all_job_uuids.append(uuid_large) util.wait_for_running_instance(self.cook_url, uuid_large) # Submit a higher-priority job that should trigger preemption uuid_high_priority, _ = util.submit_job( self.cook_url, priority=base_priority + 1, cpus=small_cpus, command=command, name='higher_priority_job') all_job_uuids.append(uuid_high_priority) # Assert that the lower-priority job was preempted def low_priority_job(): job = util.load_job(self.cook_url, uuid_large) one_hour_in_millis = 60 * 60 * 1000 start = util.current_milli_time() - one_hour_in_millis end = util.current_milli_time() running = util.jobs(self.cook_url, user=user.name, state='running', start=start, end=end).json() waiting = util.jobs(self.cook_url, user=user.name, state='waiting', start=start, end=end).json() self.logger.info( f'Currently running jobs: {json.dumps(running, indent=2)}' ) self.logger.info( f'Currently waiting jobs: {json.dumps(waiting, indent=2)}' ) return job def job_was_preempted(job): for instance in job['instances']: self.logger.debug( f'Checking if instance was preempted: {instance}') if instance.get( 'reason_string') == 'Preempted by rebalancer': return True self.logger.info(f'Job has not been preempted: {job}') return False max_wait_ms = util.settings( self.cook_url )['rebalancer']['interval-seconds'] * 1000 * 1.5 self.logger.info( f'Waiting up to {max_wait_ms} milliseconds for preemption to happen' ) util.wait_until(low_priority_job, job_was_preempted, max_wait_ms=max_wait_ms, wait_interval_ms=5000) finally: with admin: util.kill_jobs(self.cook_url, all_job_uuids, assert_response=False) util.reset_limit(self.cook_url, 'share', user.name, reason=self.current_name()) util.reset_limit(self.cook_url, 'quota', user.name, reason=self.current_name())