def setup_module(module): common.wait_for_metronome() common.wait_for_cosmos() agents = shakedown.get_private_agents() if len(agents) < 2: assert False, "Incorrect Agent count" remove_jobs()
def test_disable_schedule_recovery_from_master_bounce(): """ Confirms that a schedule runs when enabled but then stops firing when the schedule is disabled. """ client = metronome.create_client() job_id = 'schedule-disabled-{}'.format(uuid.uuid4().hex) job_json = job_no_schedule(job_id) with job(job_json): # indent job_schedule = schedule() job_schedule['cron'] = '* * * * *' # every minute client.add_schedule(job_id, job_schedule) # sleep until we run time.sleep(timedelta(minutes=1.1).total_seconds()) runs = client.get_runs(job_id) run_count = len(runs) # there is a race condition where this could be 1 or 2 # both are ok... what matters is that after disabled, that there are # no more assert run_count > 0 # update enabled = False job_schedule['enabled'] = False client.update_schedule(job_id, 'nightly', job_schedule) # # bounce master shakedown.restart_master_node() common.wait_for_metronome() # wait for the next run time.sleep(timedelta(minutes=1.5).total_seconds()) runs = client.get_runs(job_id) # make sure there are no more than the original count assert len(runs) == run_count
def setup_module(module): common.wait_for_metronome() common.wait_for_cosmos() common.cluster_info() agents = shakedown.get_private_agents() if len(agents) < 2: assert False, f"Incorrect Agent count. Expecting at least 2 agents, but have {len(agents)}" remove_jobs()
def test_metronome_shutdown_with_no_extra_tasks(): """ Test for METRONOME-100 regression When Metronome is restarted it incorrectly started another task for already running job run task. """ client = metronome.create_client() job_id = "metronome-shutdown-{}".format(uuid.uuid4().hex) with job(job_no_schedule(job_id)): # run a job before we shutdown Metronome run_id = client.run_job(job_id)["id"] common.wait_for_job_started(job_id, run_id) common.assert_job_run(client, job_id) # restart metronome process common.run_command_on_metronome_leader('sudo systemctl restart dcos-metronome') common.wait_for_metronome() # verify that no extra job runs were started when Metronome was restarted common.assert_wait_for_no_additional_tasks(tasks_count=1, client=client, job_id=job_id)
def test_metronome_shutdown_with_no_extra_tasks(): """ Test for METRONOME-100 regression When Metronome is restarted it incorrectly started another task for already running job run task. """ client = metronome.create_client() job_id = "metronome-shutdown-{}".format(uuid.uuid4().hex) with job(job_no_schedule(job_id)): # run a job before we shutdown Metronome run_id = client.run_job(job_id)["id"] common.wait_for_job_started(job_id, run_id) common.assert_job_run(client, job_id) # restart metronome process common.run_command_on_metronome_leader( 'sudo systemctl restart dcos-metronome') common.wait_for_metronome() # verify that no extra job runs were started when Metronome was restarted common.assert_wait_for_no_additional_tasks(tasks_count=1, client=client, job_id=job_id)
def test_metronome_shutdown_with_no_extra_tasks(): """ Test for METRONOME-100 regression When Metronome is restarted it incorrectly started another task for already running job run task. """ client = metronome.create_client() job_id = "metronome-shutdown-{}".format(uuid.uuid4().hex) with job(job_no_schedule(job_id)): # run a job before we shutdown Metronome run_id = client.run_job(job_id)["id"] common.wait_for_job_started(job_id, run_id) common.assert_job_run(client, job_id) # restart metronome process # this won't work in multi-master setup if the mesos leader is not the same as metronome leader # we can improve this one there is a good way how to get metronome leader from the system (e.g. info endpoint) metronome_leader = shakedown.master_leader_ip() shakedown.run_command_on_agent(metronome_leader, 'sudo systemctl restart dcos-metronome') common.wait_for_metronome() # verify that no extra job runs were started when Metronome was restarted common.assert_wait_for_no_additional_tasks(tasks_count=1, client=client, job_id=job_id)
def test_disable_schedule_recovery_from_master_bounce(): """ Confirms that a schedule runs when enabled but then stops firing when the schedule is disabled. """ client = metronome.create_client() job_id = 'schedule-disabled-{}'.format(uuid.uuid4().hex) job_json = job_no_schedule(job_id) with job(job_json): # indent job_schedule = schedule() job_schedule['cron'] = '* * * * *' # every minute client.add_schedule(job_id, job_schedule) # sleep until we run time.sleep(timedelta(minutes=1.1).total_seconds()) runs = client.get_runs(job_id) run_count = len(runs) # there is a race condition where this could be 1 or 2 # both are ok... what matters is that after disabled, that there are # no more assert run_count > 0 # update enabled = False job_schedule['enabled'] = False client.update_schedule(job_id, 'nightly', job_schedule) # bounce mesos master shakedown.run_command_on_leader('sudo systemctl restart dcos-mesos-master') common.wait_for_cosmos() common.wait_for_metronome() # wait for the next run time.sleep(timedelta(minutes=1.5).total_seconds()) runs = client.get_runs(job_id) # make sure there are no more than the original count assert len(runs) == run_count