def test_run_job_expired_job(
    mocker, one_hour_ahead, spark_job_with_run_factory, cluster_provisioner_mocks
):
    # create a job that is not due to run, e.g. the start_date isn't in the past
    # but an hour in the future
    mocker.patch(
        "atmo.clusters.provisioners.ClusterProvisioner.info",
        return_value={
            "creation_datetime": timezone.now(),
            "ready_datetime": None,
            "end_datetime": None,
            "state": Cluster.STATUS_TERMINATED,
            "state_change_reason_code": None,
            "state_change_reason_message": None,
            "public_dns": "master.public.dns.name",
        },
    )
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead, run__status=Cluster.STATUS_TERMINATED
    )
    mocker.spy(tasks.run_job, "unschedule_and_expire")
    schedule_delete = mocker.patch("atmo.jobs.schedules.SparkJobSchedule.delete")
    expire = mocker.patch("atmo.jobs.models.SparkJob.expire")
    assert spark_job_with_run.has_finished
    assert schedule_delete.call_count == 0

    tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.unschedule_and_expire.call_count == 1
    assert schedule_delete.call_count == 1
    assert expire.call_count == 1
def test_run_job_dangling_job(
    mocker, now, one_hour_ago, one_hour_ahead, spark_job_with_run_factory
):
    # create a job with a run that started one hour ago and is allowed
    # to run for two hours, so it's not timing out, but it's not quite
    # healthy, too
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead,
        job_timeout=2,
        run__status=Cluster.STATUS_WAITING,
        run__scheduled_at=one_hour_ago,
    )
    mocker.spy(tasks.run_job, "terminate_and_notify")
    mocker.patch(
        "atmo.clusters.provisioners.ClusterProvisioner.info",
        return_value={
            "creation_datetime": now,
            "ready_datetime": None,
            "end_datetime": None,
            "state": Cluster.STATUS_WAITING,
            "public_dns": None,
        },
    )
    terminate = mocker.patch("atmo.jobs.models.SparkJob.terminate")
    assert not spark_job_with_run.has_finished
    assert not spark_job_with_run.has_timed_out
    assert terminate.call_count == 0

    # tries running again
    with pytest.raises(Retry):
        tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.terminate_and_notify.call_count == 0
    assert terminate.call_count == 0
def test_run_job_not_enabled(mocker, spark_job_with_run_factory,
                             cluster_provisioner_mocks):
    spark_job_with_run = spark_job_with_run_factory(is_enabled=False)
    mocker.spy(tasks.run_job, 'check_enabled')
    with pytest.raises(exceptions.SparkJobNotEnabled):
        tasks.run_job(spark_job_with_run.pk)
    assert tasks.run_job.check_enabled.call_count == 1
def test_run_job_timed_out_job(mocker, now, one_hour_ahead, spark_job_with_run_factory):
    # create a job with a run that started two hours ago but is only allowed
    # to run for an hour, so timing out
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead,
        job_timeout=1,
        run__status=Cluster.STATUS_WAITING,
        run__scheduled_at=now - timedelta(hours=2),
    )
    mocker.spy(tasks.run_job, "terminate_and_notify")
    mocker.patch(
        "atmo.clusters.provisioners.ClusterProvisioner.info",
        return_value={
            "creation_datetime": now,
            "ready_datetime": None,
            "end_datetime": None,
            "state": Cluster.STATUS_WAITING,
            "public_dns": None,
        },
    )
    terminate = mocker.patch("atmo.jobs.models.SparkJob.terminate")
    assert not spark_job_with_run.has_finished
    assert spark_job_with_run.has_timed_out
    assert terminate.call_count == 0

    tasks.run_job(spark_job_with_run.pk)
    assert tasks.run_job.terminate_and_notify.call_count == 1
    assert terminate.call_count == 1
def test_run_job_not_enabled(mocker, spark_job_with_run_factory,
                             cluster_provisioner_mocks):
    spark_job_with_run = spark_job_with_run_factory(is_enabled=False)
    mocker.spy(tasks.run_job, 'check_enabled')
    with pytest.raises(exceptions.SparkJobNotEnabled):
        tasks.run_job(spark_job_with_run.pk)
    assert tasks.run_job.check_enabled.call_count == 1
def test_run_job_expired_job(mocker, one_hour_ahead, spark_job_with_run_factory,
                             cluster_provisioner_mocks):
    # create a job that is not due to run, e.g. the start_date isn't in the past
    # but an hour in the future
    mocker.patch(
        'atmo.clusters.provisioners.ClusterProvisioner.info',
        return_value={
            'creation_datetime': timezone.now(),
            'ready_datetime': None,
            'end_datetime': None,
            'state': Cluster.STATUS_TERMINATED,
            'state_change_reason_code': None,
            'state_change_reason_message': None,
            'public_dns': 'master.public.dns.name',
        },
    )
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead,
        run__status=Cluster.STATUS_TERMINATED,
    )
    mocker.spy(tasks.run_job, 'unschedule_and_expire')
    schedule_delete = mocker.patch(
        'atmo.jobs.schedules.SparkJobSchedule.delete'
    )
    expire = mocker.patch(
        'atmo.jobs.models.SparkJob.expire'
    )
    assert spark_job_with_run.has_finished
    assert schedule_delete.call_count == 0

    tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.unschedule_and_expire.call_count == 1
    assert schedule_delete.call_count == 1
    assert expire.call_count == 1
def test_run_job_dangling_job(mocker, now, one_hour_ago, one_hour_ahead,
                              spark_job_with_run_factory):
    # create a job with a run that started one hour ago and is allowed
    # to run for two hours, so it's not timing out, but it's not quite
    # healthy, too
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead,
        job_timeout=2,
        run__status=Cluster.STATUS_WAITING,
        run__scheduled_at=one_hour_ago,
    )
    mocker.spy(tasks.run_job, 'terminate_and_notify')
    mocker.patch(
        'atmo.clusters.provisioners.ClusterProvisioner.info',
        return_value={
            'creation_datetime': now,
            'ready_datetime': None,
            'end_datetime': None,
            'state': Cluster.STATUS_WAITING,
            'public_dns': None,
        },
    )
    terminate = mocker.patch(
        'atmo.jobs.models.SparkJob.terminate'
    )
    assert not spark_job_with_run.has_finished
    assert not spark_job_with_run.has_timed_out
    assert terminate.call_count == 0

    # tries running again
    with pytest.raises(Retry):
        tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.terminate_and_notify.call_count == 0
    assert terminate.call_count == 0
def test_run_job_timed_out_job(mocker, now, one_hour_ahead,
                               spark_job_with_run_factory):
    # create a job with a run that started two hours ago but is only allowed
    # to run for an hour, so timing out
    spark_job_with_run = spark_job_with_run_factory(
        start_date=one_hour_ahead,
        job_timeout=1,
        run__status=Cluster.STATUS_WAITING,
        run__scheduled_at=now - timedelta(hours=2),
    )
    mocker.spy(tasks.run_job, 'terminate_and_notify')
    mocker.patch(
        'atmo.clusters.provisioners.ClusterProvisioner.info',
        return_value={
            'creation_datetime': now,
            'ready_datetime': None,
            'end_datetime': None,
            'state': Cluster.STATUS_WAITING,
            'public_dns': None,
        },
    )
    terminate = mocker.patch(
        'atmo.jobs.models.SparkJob.terminate'
    )
    assert not spark_job_with_run.has_finished
    assert spark_job_with_run.has_timed_out
    assert terminate.call_count == 0

    tasks.run_job(spark_job_with_run.pk)
    assert tasks.run_job.terminate_and_notify.call_count == 1
    assert terminate.call_count == 1
def test_run_job_with_run_status_updated(mocker, spark_job_with_run_factory,
                                         cluster_provisioner_mocks):
    run = mocker.patch('atmo.jobs.models.SparkJob.run')
    refresh_from_db = mocker.patch('atmo.jobs.models.SparkJob.refresh_from_db')
    spark_job_with_run = spark_job_with_run_factory(
        run__status=Cluster.STATUS_TERMINATED, )

    assert spark_job_with_run.latest_run
    mocker.spy(tasks.run_job, 'sync_run')
    sync = mocker.patch('atmo.jobs.models.SparkJobRun.sync')

    tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.sync_run.call_count == 1
    assert sync.call_count == 1
    assert refresh_from_db.call_count == 1
    assert run.call_count == 1
def test_run_job_with_run_status_updated(mocker, spark_job_with_run_factory,
                                         cluster_provisioner_mocks):
    run = mocker.patch('atmo.jobs.models.SparkJob.run')
    refresh_from_db = mocker.patch('atmo.jobs.models.SparkJob.refresh_from_db')
    spark_job_with_run = spark_job_with_run_factory(
        run__status=Cluster.STATUS_TERMINATED,
    )

    assert spark_job_with_run.latest_run
    mocker.spy(tasks.run_job, 'sync_run')
    sync = mocker.patch('atmo.jobs.models.SparkJobRun.sync')

    tasks.run_job(spark_job_with_run.pk)

    assert tasks.run_job.sync_run.call_count == 1
    assert sync.call_count == 1
    assert refresh_from_db.call_count == 1
    assert run.call_count == 1
def test_run_job_without_run_status_updated(mocker, spark_job,
                                            cluster_provisioner_mocks):
    run = mocker.patch('atmo.jobs.models.SparkJob.run')
    refresh_from_db = mocker.patch('atmo.jobs.models.SparkJob.refresh_from_db')

    assert not spark_job.latest_run
    mocker.spy(tasks.run_job, 'sync_run')
    sync = mocker.patch('atmo.jobs.models.SparkJobRun.sync')

    tasks.run_job(spark_job.pk)

    # tries to update the status
    assert tasks.run_job.sync_run.call_count == 1
    # update does not really do it, since there wasn't a previous run
    assert sync.call_count == 0
    # no need to refresh the object
    assert refresh_from_db.call_count == 0
    # but run anyway
    assert run.call_count == 1
def test_run_job_without_run_status_updated(mocker, spark_job,
                                            cluster_provisioner_mocks):
    run = mocker.patch('atmo.jobs.models.SparkJob.run')
    refresh_from_db = mocker.patch('atmo.jobs.models.SparkJob.refresh_from_db')

    assert not spark_job.latest_run
    mocker.spy(tasks.run_job, 'sync_run')
    sync = mocker.patch('atmo.jobs.models.SparkJobRun.sync')

    tasks.run_job(spark_job.pk)

    # tries to update the status
    assert tasks.run_job.sync_run.call_count == 1
    # update does not really do it, since there wasn't a previous run
    assert sync.call_count == 0
    # no need to refresh the object
    assert refresh_from_db.call_count == 0
    # but run anyway
    assert run.call_count == 1
def test_run_job_not_exists():
    with pytest.raises(exceptions.SparkJobNotFound):
        tasks.run_job(1234)
def test_run_job_not_exists():
    with pytest.raises(exceptions.SparkJobNotFound):
        tasks.run_job(1234)