def test_restart_on_sudden_instance_termination(training_finished, launch_train, spot_terminated, caplog): class DummyInstance: id = 1 launch_train.return_value = 0 # setup the AWS worker event_config = read_config(ramp_aws_config_template())['worker'] worker = AWSWorker(event_config, submission='starting_kit_local') worker.config = event_config worker.submission = 'dummy submissions' worker.instance = DummyInstance # set the submission did not yet finish training training_finished.return_value = False spot_terminated.return_value = False worker.launch_submission() assert worker.status == 'running' assert caplog.text == '' # call CalledProcessError on checking if submission was finished training_finished.side_effect = subprocess.CalledProcessError(255, 'test') # make sure that the worker status is set to 'retry' assert worker.status == 'retry' assert 'Unable to connect to the instance' in caplog.text assert 'Adding the submission back to the queue' in caplog.text
def test_aws_worker_launch_train_error(launch_train, caplog): # mock dummy AWS instance class DummyInstance: id = 1 launch_train.side_effect = subprocess.CalledProcessError(255, 'test') # setup the AWS worker event_config = read_config(ramp_aws_config_template())['worker'] worker = AWSWorker(event_config, submission='starting_kit_local') worker.config = event_config worker.submission = 'dummy submissions' worker.instance = DummyInstance # CalledProcessError is thrown inside status = worker.launch_submission() assert 'test' in caplog.text assert 'Cannot start training of submission' in caplog.text assert worker.status == 'error' assert status == 1